diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG')
24 files changed, 5559 insertions, 3297 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c9c4d91..9035602 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25,7 +25,6 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -43,6 +42,7 @@ STATISTIC(NodesCombined , "Number of dag nodes combined"); STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); +STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); namespace { static cl::opt<bool> @@ -185,7 +185,7 @@ namespace { SDValue visitANY_EXTEND(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); - SDValue visitBIT_CONVERT(SDNode *N); + SDValue visitBITCAST(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); SDValue visitFADD(SDNode *N); SDValue visitFSUB(SDNode *N); @@ -229,12 +229,13 @@ namespace { SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); - SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT); + SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildUDIV(SDNode *N); SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); + SDValue TransformFPLoadStorePair(SDNode *N); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -248,16 +249,19 @@ namespace { bool isAlias(SDValue Ptr1, int64_t Size1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, + const MDNode *TBAAInfo1, SDValue Ptr2, int64_t Size2, const Value *SrcValue2, int SrcValueOffset2, - unsigned SrcValueAlign2) const; + unsigned SrcValueAlign2, + const MDNode *TBAAInfo2) const; /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, SDValue &Ptr, int64_t &Size, const Value *&SrcValue, int &SrcValueOffset, - unsigned &SrcValueAlignment) const; + unsigned &SrcValueAlignment, + const MDNode *&TBAAInfo) const; /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, /// looking for a better chain (aliasing node.) @@ -270,15 +274,15 @@ namespace { /// Run - runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); - + SelectionDAG &getDAG() const { return DAG; } - + /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. EVT getShiftAmountTy() { return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); } - + /// isTypeLegal - This method returns true if we are running before type /// legalization or if the specified VT is legal. bool isTypeLegal(const EVT &VT) { @@ -631,7 +635,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { // Replace the old value with the new one. ++NodesCombined; - DEBUG(dbgs() << "\nReplacing.2 "; + DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); @@ -666,12 +670,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; - return DAG.getExtLoad(ExtType, PVT, dl, + return DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getPointerInfo(), MemVT, LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); } @@ -691,7 +696,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { unsigned ExtOpc = Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; return DAG.getNode(ExtOpc, dl, PVT, Op); - } + } } if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) @@ -889,11 +894,12 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { LoadSDNode *LD = cast<LoadSDNode>(N); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) : LD->getExtensionType(); - SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl, + SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getPointerInfo(), MemVT, LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); @@ -975,7 +981,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getNode()->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DEBUG(dbgs() << "\nReplacing.3 "; + DEBUG(dbgs() << "\nReplacing.3 "; N->dump(&DAG); dbgs() << "\nWith: "; RV.getNode()->dump(&DAG); @@ -1054,7 +1060,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); - case ISD::BIT_CONVERT: return visitBIT_CONVERT(N); + case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); case ISD::FADD: return visitFADD(N); case ISD::FSUB: return visitFSUB(N); @@ -1225,7 +1231,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { } } } - + SDValue Result; // If we've change things around then replace token factor. @@ -1424,6 +1430,29 @@ SDValue DAGCombiner::visitADD(SDNode *N) { N0.getOperand(0).getOperand(1), N0.getOperand(1))); + if (N1.getOpcode() == ISD::AND) { + SDValue AndOp0 = N1.getOperand(0); + ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); + unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); + unsigned DestBits = VT.getScalarType().getSizeInBits(); + + // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) + // and similar xforms where the inner op is either ~0 or 0. + if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { + DebugLoc DL = N->getDebugLoc(); + return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); + } + } + + // add (sext i1), X -> sub X, (zext i1) + if (N0.getOpcode() == ISD::SIGN_EXTEND && + N0.getOperand(0).getValueType() == MVT::i1 && + !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { + DebugLoc DL = N->getDebugLoc(); + SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); + } + return SDValue(); } @@ -1438,7 +1467,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { if (N->hasNUsesOfValue(0, 1)) return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Flag)); + N->getDebugLoc(), MVT::Glue)); // canonicalize constant to RHS. if (N0C && !N1C) @@ -1447,7 +1476,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // fold (addc x, 0) -> x + no carry out if (N1C && N1C->isNullValue()) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Flag)); + N->getDebugLoc(), MVT::Glue)); // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; @@ -1464,7 +1493,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Flag)); + N->getDebugLoc(), MVT::Glue)); } return SDValue(); @@ -1489,6 +1518,22 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { return SDValue(); } +// Since it may not be valid to emit a fold to zero for vector initializers +// check if we can before folding. +static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, + SelectionDAG &DAG, bool LegalOperations) { + if (!VT.isVector()) { + return DAG.getConstant(0, VT); + } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { + // Produce a vector of zeros. + SDValue El = DAG.getConstant(0, VT.getVectorElementType()); + std::vector<SDValue> Ops(VT.getVectorNumElements(), El); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, + &Ops[0], Ops.size()); + } + return SDValue(); +} + SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1503,8 +1548,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // fold (sub x, x) -> 0 + // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) - return DAG.getConstant(0, N->getValueType(0)); + return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); // fold (sub c1, c2) -> c1-c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); @@ -1515,6 +1561,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) if (N0C && N0C->isAllOnesValue()) return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + // fold A-(A-B) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) + return N1.getOperand(1); // fold (A+B)-A -> B if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) return N0.getOperand(1); @@ -1897,6 +1946,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); // fold (mulhs x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -1910,6 +1960,22 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) return DAG.getConstant(0, VT); + // If the type twice as wide is legal, transform the mulhs to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); + N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); + N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, + DAG.getConstant(SimpleSize, getShiftAmountTy())); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + } + } + return SDValue(); } @@ -1918,6 +1984,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); // fold (mulhu x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -1929,6 +1996,22 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) return DAG.getConstant(0, VT); + // If the type twice as wide is legal, transform the mulhu to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); + N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); + N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, + DAG.getConstant(SimpleSize, getShiftAmountTy())); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + } + } + return SDValue(); } @@ -1992,6 +2075,29 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); if (Res.getNode()) return Res; + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + // If the type twice as wide is legal, transform the mulhu to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); + Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); + // Compute the high part as N1. + Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, + DAG.getConstant(SimpleSize, getShiftAmountTy())); + Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); + // Compute the low part as N0. + Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); + return CombineTo(N, Lo, Hi); + } + } + return SDValue(); } @@ -1999,6 +2105,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); if (Res.getNode()) return Res; + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + // If the type twice as wide is legal, transform the mulhu to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); + Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); + // Compute the high part as N1. + Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, + DAG.getConstant(SimpleSize, getShiftAmountTy())); + Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); + // Compute the low part as N0. + Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); + return CombineTo(N, Lo, Hi); + } + } + return SDValue(); } @@ -2116,7 +2245,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N0Op0 = N0.getOperand(0); APInt Mask = ~N1C->getAPIntValue(); - Mask.trunc(N0Op0.getValueSizeInBits()); + Mask = Mask.trunc(N0Op0.getValueSizeInBits()); if (DAG.MaskedValueIsZero(N0Op0, Mask)) { SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), N0.getValueType(), N0Op0); @@ -2198,10 +2327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getSrcValue(), - LN0->getSrcValueOffset(), MemVT, + LN0->getPointerInfo(), MemVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); AddToWorkList(N); @@ -2221,10 +2349,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), MemVT, + LN0->getBasePtr(), LN0->getPointerInfo(), + MemVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); AddToWorkList(N); @@ -2253,18 +2381,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ExtVT == LoadedVT && (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; - - SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), + + SDValue NewLoad = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, LN0->getChain(), LN0->getBasePtr(), - LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); AddToWorkList(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } - + // Do not change the width of a volatile load. // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). @@ -2288,12 +2416,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } AddToWorkList(NewPtr.getNode()); - + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, LN0->getChain(), NewPtr, - LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), Alignment); AddToWorkList(N); @@ -2722,17 +2850,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { N01C->getAPIntValue(), VT)); } // fold (xor x, x) -> 0 - if (N0 == N1) { - if (!VT.isVector()) { - return DAG.getConstant(0, VT); - } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){ - // Produce a vector of zeros. - SDValue El = DAG.getConstant(0, VT.getVectorElementType()); - std::vector<SDValue> Ops(VT.getVectorNumElements(), El); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, - &Ops[0], Ops.size()); - } - } + if (N0 == N1) + return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { @@ -2810,7 +2929,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { LHS->getOperand(1), N->getOperand(1)); // Create the new shift. - SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(), + SDValue NewShift = DAG.getNode(N->getOpcode(), + LHS->getOperand(0).getDebugLoc(), VT, LHS->getOperand(0), N->getOperand(1)); // Create the new binop. @@ -2850,7 +2970,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); - TruncC.trunc(TruncVT.getSizeInBits()); + TruncC = TruncC.trunc(TruncVT.getSizeInBits()); return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, DAG.getNode(ISD::TRUNCATE, @@ -2868,11 +2988,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); - if (c1 + c2 > OpSizeInBits) + if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } + + // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) + // For this to be valid, the second form must not preserve any of the bits + // that are shifted out by the inner shift in the first form. This means + // the outer shift size must be >= the number of bits added by the ext. + // As a corollary, we don't care what kind of ext it is. + if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) && + N0.getOperand(0).getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0.getOperand(0).getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); + if (c2 >= OpSizeInBits - InnerShiftSize) { + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, + DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, + N0.getOperand(0)->getOperand(0)), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + } + // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or // (srl (and x, (shl -1, c1)), (sub c1, c2)) if (N1C && N0.getOpcode() == ISD::SRL && @@ -2973,7 +3119,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N01C && N1C) { // Determine what the truncate's result bitsize and type would be. EVT TruncVT = - EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue()); + EVT::getIntegerVT(*DAG.getContext(), + OpSizeInBits - N1C->getZExtValue()); // Determine the residual right-shift amount. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -3006,7 +3153,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); - TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); + TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, @@ -3017,6 +3164,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) + // if c1 is equal to the number of bits the trunc removes + if (N0.getOpcode() == ISD::TRUNCATE && + (N0.getOperand(0).getOpcode() == ISD::SRL || + N0.getOperand(0).getOpcode() == ISD::SRA) && + N0.getOperand(0).hasOneUse() && + N0.getOperand(0).getOperand(1).hasOneUse() && + N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { + EVT LargeVT = N0.getOperand(0).getValueType(); + ConstantSDNode *LargeShiftAmt = + cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); + + if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == + LargeShiftAmt->getZExtValue()) { + SDValue Amt = + DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), + getShiftAmountTy()); + SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, + N0.getOperand(0).getOperand(0), Amt); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); + } + } + // Simplify, based on bits shifted out of the LHS. if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -3065,12 +3235,33 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); - if (c1 + c2 > OpSizeInBits) + if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } - + + // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) + if (N1C && N0.getOpcode() == ISD::TRUNCATE && + N0.getOperand(0).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0.getOperand(0).getValueType(); + EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); + // This is only valid if the OpSizeInBits + c1 = size of inner shift. + if (c1 + OpSizeInBits == InnerShiftSize) { + if (c1 + c2 >= InnerShiftSize) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, + DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, + N0.getOperand(0)->getOperand(0), + DAG.getConstant(c1 + c2, ShiftCountVT))); + } + } + // fold (srl (shl x, c), c) -> (and x, cst2) if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && N0.getValueSizeInBits() <= 64) { @@ -3078,7 +3269,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(~0ULL >> ShAmt, VT)); } - + // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -3147,7 +3338,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); - TruncC.trunc(TruncVT.getSizeInBits()); + TruncC = TruncC.trunc(TruncVT.getSizeInBits()); return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, @@ -3182,7 +3373,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // brcond i32 %c ... // // into - // + // // %a = ... // %b = and %a, 2 // %c = setcc eq %b, 0 @@ -3422,7 +3613,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, } if (BothLiveOut) // Both unextended and extended values are live out. There had better be - // good a reason for the transformation. + // a good reason for the transformation. return ExtendNodes.size(); } return true; @@ -3503,10 +3694,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), + LN0->getBasePtr(), LN0->getPointerInfo(), N0.getValueType(), LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); @@ -3547,10 +3737,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), MemVT, + LN0->getBasePtr(), LN0->getPointerInfo(), + MemVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); CombineTo(N, ExtLoad); @@ -3611,7 +3801,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()), NegOne, DAG.getConstant(0, VT)); - } + } // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && @@ -3652,6 +3842,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE && (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + + // fold (zext (truncate (load x))) -> (zext (smaller load x)) + // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -3677,7 +3881,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask.zext(VT.getSizeInBits()); + Mask = Mask.zext(VT.getSizeInBits()); return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, DAG.getConstant(Mask, VT)); } @@ -3692,10 +3896,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), + LN0->getBasePtr(), LN0->getPointerInfo(), N0.getValueType(), LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); @@ -3736,10 +3939,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), MemVT, + LN0->getBasePtr(), LN0->getPointerInfo(), + MemVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); CombineTo(N, ExtLoad); @@ -3805,21 +4008,27 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { isa<ConstantSDNode>(N0.getOperand(1)) && N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) { + SDValue ShAmt = N0.getOperand(1); + unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); if (N0.getOpcode() == ISD::SHL) { + SDValue InnerZExt = N0.getOperand(0); // If the original shl may be shifting out bits, do not perform this // transformation. - unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() - - N0.getOperand(0).getOperand(0).getValueType().getSizeInBits(); - if (ShAmt > KnownZeroBits) + unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - + InnerZExt.getOperand(0).getValueType().getSizeInBits(); + if (ShAmtVal > KnownZeroBits) return SDValue(); } - DebugLoc dl = N->getDebugLoc(); - return DAG.getNode(N0.getOpcode(), dl, VT, - DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), - DAG.getNode(ISD::ZERO_EXTEND, dl, - N0.getOperand(1).getValueType(), - N0.getOperand(1))); + + DebugLoc DL = N->getDebugLoc(); + + // Ensure that the shift amount is wide enough for the shifted value. + if (VT.getSizeInBits() >= 256) + ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); + + return DAG.getNode(N0.getOpcode(), DL, VT, + DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), + ShAmt); } return SDValue(); @@ -3879,7 +4088,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask.zext(VT.getSizeInBits()); + Mask = Mask.zext(VT.getSizeInBits()); return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, DAG.getConstant(Mask, VT)); } @@ -3894,10 +4103,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), + LN0->getBasePtr(), LN0->getPointerInfo(), N0.getValueType(), LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); @@ -3938,11 +4146,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT, - N->getDebugLoc(), - LN0->getChain(), LN0->getBasePtr(), - LN0->getSrcValue(), - LN0->getSrcValueOffset(), MemVT, + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), MemVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); CombineTo(N, ExtLoad); @@ -4053,11 +4259,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); - if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) - return SDValue(); } else if (Opc == ISD::SRL) { - // Annother special-case: SRL is basically zero-extending a narrower - // value. + // Another special-case: SRL is basically zero-extending a narrower value. ExtType = ISD::ZEXTLOAD; N0 = SDValue(N, 0); ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); @@ -4065,10 +4268,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() - N01->getZExtValue()); } + if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) + return SDValue(); unsigned EVTBits = ExtVT.getSizeInBits(); + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!ExtVT.isRound()) + return SDValue(); + unsigned ShAmt = 0; - if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) { + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { ShAmt = N01->getZExtValue(); // Is the shift amount a multiple of size of VT? @@ -4078,52 +4289,88 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) return SDValue(); } + + // At this point, we must have a load or else we can't do the transform. + if (!isa<LoadSDNode>(N0)) return SDValue(); + + // If the shift amount is larger than the input type then we're not + // accessing any of the loaded bytes. If the load was a zextload/extload + // then the result of the shift+trunc is zero/undef (handled elsewhere). + // If the load was a sextload then the result is a splat of the sign bit + // of the extended byte. This is not worth optimizing for. + if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) + return SDValue(); } } - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() && - cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits && - // Do not change the width of a volatile load. - !cast<LoadSDNode>(N0)->isVolatile()) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT PtrType = N0.getOperand(1).getValueType(); - - // For big endian targets, we need to adjust the offset to the pointer to - // load the correct bytes. - if (TLI.isBigEndian()) { - unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); - unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); - ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; - } - - uint64_t PtrOff = ShAmt / 8; - unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); - SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), - PtrType, LN0->getBasePtr(), - DAG.getConstant(PtrOff, PtrType)); - AddToWorkList(NewPtr.getNode()); - - SDValue Load = (ExtType == ISD::NON_EXTLOAD) - ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) - : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - NewAlign); - - // Replace the old load's chain with the new load's chain. - WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), - &DeadNodes); + // If the load is shifted left (and the result isn't shifted back right), + // we can fold the truncate through the shift. + unsigned ShLeftAmt = 0; + if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && + ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { + if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + ShLeftAmt = N01->getZExtValue(); + N0 = N0.getOperand(0); + } + } + + // If we haven't found a load, we can't narrow it. Don't transform one with + // multiple uses, this would require adding a new load. + if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() || + // Don't change the width of a volatile load. + cast<LoadSDNode>(N0)->isVolatile()) + return SDValue(); + + // Verify that we are actually reducing a load width here. + if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits) + return SDValue(); + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT PtrType = N0.getOperand(1).getValueType(); + + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) { + unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); + ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; + } + + uint64_t PtrOff = ShAmt / 8; + unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); + SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), + PtrType, LN0->getBasePtr(), + DAG.getConstant(PtrOff, PtrType)); + AddToWorkList(NewPtr.getNode()); + + SDValue Load; + if (ExtType == ISD::NON_EXTLOAD) + Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), NewAlign); + else + Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + NewAlign); + + // Replace the old load's chain with the new load's chain. + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), + &DeadNodes); - // Return the new loaded value. - return Load; + // Shift the result left, if we've swallowed a left shift. + SDValue Result = Load; + if (ShLeftAmt != 0) { + EVT ShImmTy = getShiftAmountTy(); + if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) + ShImmTy = VT; + Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, + Result, DAG.getConstant(ShLeftAmt, ShImmTy)); } - return SDValue(); + // Return the new loaded value. + return Result; } SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { @@ -4196,10 +4443,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getBasePtr(), LN0->getPointerInfo(), + EVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); CombineTo(N, ExtLoad); @@ -4213,10 +4460,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getBasePtr(), LN0->getPointerInfo(), + EVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); CombineTo(N, ExtLoad); @@ -4295,7 +4542,9 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); - if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || + LD1->getPointerInfo().getAddrSpace() != + LD2->getPointerInfo().getAddrSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); @@ -4313,14 +4562,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), - LD1->getBasePtr(), LD1->getSrcValue(), - LD1->getSrcValueOffset(), false, false, Align); + LD1->getBasePtr(), LD1->getPointerInfo(), + false, false, Align); } return SDValue(); } -SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { +SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4344,12 +4593,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { assert(!DestEltVT.isVector() && "Element type of vector ValueType must not be vector!"); if (isSimple) - return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT); + return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); } // If the input is a constant, let getNode fold it. if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { - SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); + SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0); if (Res.getNode() != N) { if (!LegalOperations || TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) @@ -4365,8 +4614,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { } // (conv (conv x, t1), t2) -> (conv x, t2) - if (N0.getOpcode() == ISD::BIT_CONVERT) - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, + if (N0.getOpcode() == ISD::BITCAST) + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) @@ -4382,13 +4631,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { if (Align <= OrigAlign) { SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), - LN0->getBasePtr(), - LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), OrigAlign); AddToWorkList(N); CombineTo(N0.getNode(), - DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), N0.getValueType(), Load), Load.getValue(1)); return Load; @@ -4400,7 +4648,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { // This often reduces constant pool loads. if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { - SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT, + SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); AddToWorkList(NewConv.getNode()); @@ -4423,7 +4671,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { - SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), IntXVT, N0.getOperand(1)); AddToWorkList(X.getNode()); @@ -4448,7 +4696,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { X, DAG.getConstant(SignBit, VT)); AddToWorkList(X.getNode()); - SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, Cst, DAG.getConstant(~SignBit, VT)); @@ -4473,11 +4721,11 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { return CombineConsecutiveLoads(N, VT); } -/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector +/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the /// destination element value type. SDValue DAGCombiner:: -ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { +ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); // If this is already the right type, we're done. @@ -4495,10 +4743,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Due to the FP element handling below calling this routine recursively, // we can end up with a scalar-to-vector node here. if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, - DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), + return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), DstEltVT, BV->getOperand(0))); - + SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { SDValue Op = BV->getOperand(i); @@ -4506,7 +4754,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); - Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), + Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } @@ -4522,7 +4770,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // same sizes. assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); - BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); + BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); SrcEltVT = IntVT; } @@ -4531,10 +4779,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (DstEltVT.isFloatingPoint()) { assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); - SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); + SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); // Next, convert to FP elements of the same size. - return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT); + return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); } // Okay, we know the src/dst types are both integers of differing types. @@ -4556,7 +4804,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (Op.getOpcode() == ISD::UNDEF) continue; EltIsUndef = false; - NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). + NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). zextOrTrunc(SrcBitSize).zext(DstBitSize); } @@ -4586,13 +4834,13 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { continue; } - APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))-> - getAPIntValue()).zextOrTrunc(SrcBitSize); + APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> + getAPIntValue().zextOrTrunc(SrcBitSize); for (unsigned j = 0; j != NumOutputsPerInput; ++j) { - APInt ThisVal = APInt(OpVal).trunc(DstBitSize); + APInt ThisVal = OpVal.trunc(DstBitSize); Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); - if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal) + if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) // Simply turn this into a SCALAR_TO_VECTOR of the new type. return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, Ops[0]); @@ -4984,10 +5232,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), + LN0->getBasePtr(), LN0->getPointerInfo(), N0.getValueType(), LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); @@ -5011,7 +5258,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BIT_CONVERT && + if (N0.getOpcode() == ISD::BITCAST && !VT.isVector() && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger()) { @@ -5021,7 +5268,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Int); } } @@ -5047,7 +5294,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && + if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); @@ -5056,7 +5303,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), Int); } } @@ -5084,14 +5331,17 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { N1.getOperand(0), N1.getOperand(1), N2); } - SDNode *Trunc = 0; - if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) { - // Look past truncate. - Trunc = N1.getNode(); - N1 = N1.getOperand(0); - } + if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || + ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && + (N1.getOperand(0).hasOneUse() && + N1.getOperand(0).getOpcode() == ISD::SRL))) { + SDNode *Trunc = 0; + if (N1.getOpcode() == ISD::TRUNCATE) { + // Look pass the truncate. + Trunc = N1.getNode(); + N1 = N1.getOperand(0); + } - if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) { // Match this pattern so that we can generate simpler code: // // %a = ... @@ -5100,7 +5350,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // brcond i32 %c ... // // into - // + // // %a = ... // %b = and i32 %a, 2 // %c = setcc eq %b, 0 @@ -5146,8 +5396,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { } } } + + if (Trunc) + // Restore N1 if the above transformation doesn't match. + N1 = N->getOperand(1); } - + // Transform br(xor(x, y)) -> br(x != y) // Transform br(xor(xor(x,y), 1)) -> br (x == y) if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { @@ -5181,9 +5435,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Equal = true; } - SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1; - - EVT SetCCVT = NodeToReplace.getValueType(); + EVT SetCCVT = N1.getValueType(); if (LegalTypes) SetCCVT = TLI.getSetCCResultType(SetCCVT); SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), @@ -5192,9 +5444,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Equal ? ISD::SETEQ : ISD::SETNE); // Replace the uses of XOR with SETCC WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes); - removeFromWorkList(NodeToReplace.getNode()); - DAG.DeleteNode(NodeToReplace.getNode()); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); + removeFromWorkList(N1.getNode()); + DAG.DeleteNode(N1.getNode()); return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), MVT::Other, Chain, SetCC, N2); } @@ -5568,10 +5820,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getAlignment()) - return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), - N->getDebugLoc(), - Chain, Ptr, LD->getSrcValue(), - LD->getSrcValueOffset(), LD->getMemoryVT(), + return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + LD->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), Align); } } @@ -5587,15 +5839,13 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Replace the chain to void dependency. if (LD->getExtensionType() == ISD::NON_EXTLOAD) { ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), - BetterChain, Ptr, - LD->getSrcValue(), LD->getSrcValueOffset(), + BetterChain, Ptr, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); } else { - ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), - LD->getDebugLoc(), - BetterChain, Ptr, LD->getSrcValue(), - LD->getSrcValueOffset(), + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), + LD->getValueType(0), + BetterChain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), @@ -5605,10 +5855,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Create token factor to keep old chain connected. SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, Chain, ReplLoad.getValue(1)); - + // Make sure the new and old chains are cleaned up. AddToWorkList(Token.getNode()); - + // Replace uses with load result and token factor. Don't add users // to work list. return CombineTo(N, ReplLoad.getValue(0), Token, false); @@ -5628,17 +5878,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { static std::pair<unsigned, unsigned> CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { std::pair<unsigned, unsigned> Result(0, 0); - + // Check for the structure we're looking for. if (V->getOpcode() != ISD::AND || !isa<ConstantSDNode>(V->getOperand(1)) || !ISD::isNormalLoad(V->getOperand(0).getNode())) return Result; - + // Check the chain and pointer. LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. - + // The store should be chained directly to the load or be an operand of a // tokenfactor. if (LD == Chain.getNode()) @@ -5654,7 +5904,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { } if (!isOk) return Result; } - + // This only handles simple types. if (V.getValueType() != MVT::i16 && V.getValueType() != MVT::i32 && @@ -5670,7 +5920,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. if (NotMaskLZ == 64) return Result; // All zero mask. - + // See if we have a continuous run of bits. If so, we have 0*1+0* if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) return Result; @@ -5678,19 +5928,19 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. if (V.getValueType() != MVT::i64 && NotMaskLZ) NotMaskLZ -= 64-V.getValueSizeInBits(); - + unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; switch (MaskedBytes) { - case 1: - case 2: + case 1: + case 2: case 4: break; default: return Result; // All one mask, or 5-byte mask. } - + // Verify that the first bit starts at a multiple of mask so that the access // is aligned the same as the access width. if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; - + Result.first = MaskedBytes; Result.second = NotMaskTZ/8; return Result; @@ -5707,20 +5957,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, unsigned NumBytes = MaskInfo.first; unsigned ByteShift = MaskInfo.second; SelectionDAG &DAG = DC->getDAG(); - + // Check to see if IVal is all zeros in the part being masked in by the 'or' // that uses this. If not, this is not a replacement. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), ByteShift*8, (ByteShift+NumBytes)*8); if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; - + // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type // legalization. MVT VT = MVT::getIntegerVT(NumBytes*8); if (!DC->isTypeLegal(VT)) return 0; - + // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. if (ByteShift) @@ -5735,20 +5985,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, StOffset = ByteShift; else StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; - + SDValue Ptr = St->getBasePtr(); if (StOffset) { Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); NewAlign = MinAlign(NewAlign, StOffset); } - + // Truncate down to the new size. IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); - + ++OpsNarrowed; - return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, - St->getSrcValue(), St->getSrcValueOffset()+StOffset, + return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, + St->getPointerInfo().getWithOffset(StOffset), false, false, NewAlign).getNode(); } @@ -5771,7 +6021,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); unsigned Opc = Value.getOpcode(); - + // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst // is a byte mask indicating a consecutive number of bytes, check to see if // Y is known to provide just those bytes. If so, we try to replace the @@ -5784,7 +6034,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(1), ST,this)) return SDValue(NewST, 0); - + // Or is commutative, so try swapping X and Y. MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); if (MaskedLoad.first) @@ -5792,7 +6042,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { Value.getOperand(0), ST,this)) return SDValue(NewST, 0); } - + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || Value.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); @@ -5801,7 +6051,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && Chain == SDValue(N0.getNode(), 1)) { LoadSDNode *LD = cast<LoadSDNode>(N0); - if (LD->getBasePtr() != Ptr) + if (LD->getBasePtr() != Ptr || + LD->getPointerInfo().getAddrSpace() != + ST->getPointerInfo().getAddrSpace()) return SDValue(); // Find the type to narrow it the load / op / store to. @@ -5850,14 +6102,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { DAG.getConstant(PtrOff, Ptr.getValueType())); SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), LD->getChain(), NewPtr, - LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), NewAlign); SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), NewVal, NewPtr, - ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getPointerInfo().getWithOffset(PtrOff), false, false, NewAlign); AddToWorkList(NewPtr.getNode()); @@ -5874,6 +6126,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); } +/// TransformFPLoadStorePair - For a given floating point load / store pair, +/// if the load value isn't used by any other operations, then consider +/// transforming the pair to integer load / store operations if the target +/// deems the transformation profitable. +SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && + Value.hasOneUse() && + Chain == SDValue(Value.getNode(), 1)) { + LoadSDNode *LD = cast<LoadSDNode>(Value); + EVT VT = LD->getMemoryVT(); + if (!VT.isFloatingPoint() || + VT != ST->getMemoryVT() || + LD->isNonTemporal() || + ST->isNonTemporal() || + LD->getPointerInfo().getAddrSpace() != 0 || + ST->getPointerInfo().getAddrSpace() != 0) + return SDValue(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || + !TLI.isOperationLegal(ISD::STORE, IntVT) || + !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || + !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) + return SDValue(); + + unsigned LDAlign = LD->getAlignment(); + unsigned STAlign = ST->getAlignment(); + const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); + unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy); + if (LDAlign < ABIAlign || STAlign < ABIAlign) + return SDValue(); + + SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), + LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + false, false, LDAlign); + + SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), + NewLD, ST->getBasePtr(), + ST->getPointerInfo(), + false, false, STAlign); + + AddToWorkList(NewLD.getNode()); + AddToWorkList(NewST.getNode()); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1), + &DeadNodes); + ++LdStFP2Int; + return NewST; + } + + return SDValue(); +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -5882,7 +6191,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // If this is a store of a bit convert, store the input value if the // resultant store does not need a higher alignment than the original. - if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && + if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && ST->isUnindexed()) { unsigned OrigAlign = ST->getAlignment(); EVT SVT = Value.getOperand(0).getValueType(); @@ -5892,8 +6201,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->isVolatile(), + Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), OrigAlign); } @@ -5917,8 +6225,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), MVT::i32); return DAG.getStore(Chain, N->getDebugLoc(), Tmp, - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->isVolatile(), + Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } break; @@ -5929,8 +6236,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). getZExtValue(), MVT::i64); return DAG.getStore(Chain, N->getDebugLoc(), Tmp, - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->isVolatile(), + Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } else if (!ST->isVolatile() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { @@ -5942,23 +6248,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); if (TLI.isBigEndian()) std::swap(Lo, Hi); - int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), + Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, ST->getAlignment()); Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); - SVOffset += 4; Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, - Ptr, ST->getSrcValue(), - SVOffset, isVolatile, isNonTemporal, + Ptr, ST->getPointerInfo().getWithOffset(4), + isVolatile, isNonTemporal, Alignment); return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, St0, St1); @@ -5974,12 +6277,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->getMemoryVT(), + Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), Align); } } + // Try transforming a pair floating point load / store ops to integer + // load / store ops. + SDValue NewST = TransformFPLoadStorePair(N); + if (NewST.getNode()) + return NewST; + if (CombinerAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -5991,12 +6299,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, - ST->getSrcValue(),ST->getSrcValueOffset(), + ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } else { ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, - ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } @@ -6030,17 +6338,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { AddToWorkList(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->getMemoryVT(), + Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. if (SimplifyDemandedBits(Value, - APInt::getLowBitsSet( - Value.getValueType().getScalarType().getSizeInBits(), - ST->getMemoryVT().getScalarType().getSizeInBits()))) + APInt::getLowBitsSet( + Value.getValueType().getScalarType().getSizeInBits(), + ST->getMemoryVT().getScalarType().getSizeInBits()))) return SDValue(N, 0); } @@ -6064,8 +6371,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), ST->getMemoryVT())) { return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->getMemoryVT(), + Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } @@ -6082,6 +6388,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (InVal.getOpcode() == ISD::UNDEF) return InVec; + EVT VT = InVec.getValueType(); + + // If we can't generate a legal BUILD_VECTOR, exit + if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return SDValue(); + // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new // vector with the inserted element. if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) { @@ -6091,13 +6403,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (Elt < Ops.size()) Ops[Elt] = InVal; return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - InVec.getValueType(), &Ops[0], Ops.size()); + VT, &Ops[0], Ops.size()); } - // If the invec is an UNDEF and if EltNo is a constant, create a new + // If the invec is an UNDEF and if EltNo is a constant, create a new // BUILD_VECTOR with undef elements and the inserted element. - if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && + if (InVec.getOpcode() == ISD::UNDEF && isa<ConstantSDNode>(EltNo)) { - EVT VT = InVec.getValueType(); EVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT)); @@ -6106,7 +6417,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (Elt < Ops.size()) Ops[Elt] = InVal; return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - InVec.getValueType(), &Ops[0], Ops.size()); + VT, &Ops[0], Ops.size()); } return SDValue(); } @@ -6138,14 +6449,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue EltNo = N->getOperand(1); if (isa<ConstantSDNode>(EltNo)) { - unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); bool NewLoad = false; bool BCNumEltsChanged = false; EVT VT = InVec.getValueType(); EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; - if (InVec.getOpcode() == ISD::BIT_CONVERT) { + if (InVec.getOpcode() == ISD::BITCAST) { EVT BCVT = InVec.getOperand(0).getValueType(); if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); @@ -6176,10 +6487,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Select the input vector, guarding against out of range extract vector. unsigned NumElems = VT.getVectorNumElements(); - int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt); + int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); - if (InVec.getOpcode() == ISD::BIT_CONVERT) + if (InVec.getOpcode() == ISD::BITCAST) InVec = InVec.getOperand(0); if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); @@ -6190,12 +6501,17 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile()) return SDValue(); + // If Idx was -1 above, Elt is going to be -1, so just return undef. + if (Elt == -1) + return DAG.getUNDEF(LN0->getBasePtr().getValueType()); + unsigned Align = LN0->getAlignment(); if (NewLoad) { // Check the resultant load doesn't need a higher alignment than the // original load. unsigned NewAlign = - TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); + TLI.getTargetData() + ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) return SDValue(); @@ -6204,8 +6520,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } SDValue NewPtr = LN0->getBasePtr(); + unsigned PtrOff = 0; + if (Elt) { - unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; + PtrOff = LVT.getSizeInBits() * Elt / 8; EVT PtrType = NewPtr.getValueType(); if (TLI.isBigEndian()) PtrOff = VT.getSizeInBits() / 8 - PtrOff; @@ -6214,7 +6532,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), Align); } @@ -6280,7 +6598,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); if (ExtIndex > VT.getVectorNumElements()) return SDValue(); - + Mask.push_back(ExtIndex); continue; } @@ -6328,15 +6646,16 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // FIXME: implement canonicalizations from DAG.getVectorShuffle() - // If it is a splat, check if the argument vector is a build_vector with - // all scalar elements the same. - if (cast<ShuffleVectorSDNode>(N)->isSplat()) { + // If it is a splat, check if the argument vector is another splat or a + // build_vector with all scalar elements the same. + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { SDNode *V = N0.getNode(); // If this is a bit convert that changes the element type of the vector but // not the number of vector elements, look through it. Be careful not to // look though conversions that change things like v4f32 to v2f64. - if (V->getOpcode() == ISD::BIT_CONVERT) { + if (V->getOpcode() == ISD::BITCAST) { SDValue ConvInput = V->getOperand(0); if (ConvInput.getValueType().isVector() && ConvInput.getValueType().getVectorNumElements() == NumElts) @@ -6344,30 +6663,28 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } if (V->getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElems = V->getNumOperands(); - unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex(); - if (NumElems > BaseIdx) { - SDValue Base; - bool AllSame = true; - for (unsigned i = 0; i != NumElems; ++i) { - if (V->getOperand(i).getOpcode() != ISD::UNDEF) { - Base = V->getOperand(i); - break; - } + assert(V->getNumOperands() == NumElts && + "BUILD_VECTOR has wrong number of operands"); + SDValue Base; + bool AllSame = true; + for (unsigned i = 0; i != NumElts; ++i) { + if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + Base = V->getOperand(i); + break; } - // Splat of <u, u, u, u>, return <u, u, u, u> - if (!Base.getNode()) - return N0; - for (unsigned i = 0; i != NumElems; ++i) { - if (V->getOperand(i) != Base) { - AllSame = false; - break; - } + } + // Splat of <u, u, u, u>, return <u, u, u, u> + if (!Base.getNode()) + return N0; + for (unsigned i = 0; i != NumElts; ++i) { + if (V->getOperand(i) != Base) { + AllSame = false; + break; } - // Splat of <x, x, x, x>, return <x, x, x, x> - if (AllSame) - return N0; } + // Splat of <x, x, x, x>, return <x, x, x, x> + if (AllSame) + return N0; } } return SDValue(); @@ -6436,7 +6753,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (N->getOpcode() == ISD::AND) { - if (RHS.getOpcode() == ISD::BIT_CONVERT) + if (RHS.getOpcode() == ISD::BITCAST) RHS = RHS.getOperand(0); if (RHS.getOpcode() == ISD::BUILD_VECTOR) { SmallVector<int, 8> Indices; @@ -6464,9 +6781,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { DAG.getConstant(0, EltVT)); SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), RVT, &ZeroOps[0], ZeroOps.size()); - LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); + LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf); + return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); } } @@ -6480,10 +6797,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // things. Simplifying them may result in a loss of legality. if (LegalOperations) return SDValue(); - EVT VT = N->getValueType(0); - assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); + assert(N->getValueType(0).isVector() && + "SimplifyVBinOp only works on vectors!"); - EVT EltType = VT.getVectorElementType(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Shuffle = XformToShuffleWithZero(N); @@ -6516,14 +6832,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { break; } - // If the vector element type is not legal, the BUILD_VECTOR operands - // are promoted and implicitly truncated. Make that explicit here. - if (LHSOp.getValueType() != EltType) - LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp); - if (RHSOp.getValueType() != EltType) - RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp); - - SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType, + EVT VT = LHSOp.getValueType(); + assert(RHSOp.getValueType() == VT && + "SimplifyVBinOp with different BUILD_VECTOR element types"); + SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, LHSOp, RHSOp); if (FoldOp.getOpcode() != ISD::UNDEF && FoldOp.getOpcode() != ISD::Constant && @@ -6533,11 +6845,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { AddToWorkList(FoldOp.getNode()); } - if (Ops.size() == LHS.getNumOperands()) { - EVT VT = LHS.getValueType(); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, - &Ops[0], Ops.size()); - } + if (Ops.size() == LHS.getNumOperands()) + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + LHS.getValueType(), &Ops[0], Ops.size()); } return SDValue(); @@ -6580,103 +6890,101 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { + // Cannot simplify select with vector condition + if (TheSelect->getOperand(0).getValueType().isVector()) return false; + // If this is a select from two identical things, try to pull the operation // through the select. - if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){ - // If this is a load and the token chain is identical, replace the select - // of two loads with a load through a select of the address to load from. - // This triggers in things like "select bool X, 10.0, 123.0" after the FP - // constants have been dropped into the constant pool. - if (LHS.getOpcode() == ISD::LOAD && + if (LHS.getOpcode() != RHS.getOpcode() || + !LHS.hasOneUse() || !RHS.hasOneUse()) + return false; + + // If this is a load and the token chain is identical, replace the select + // of two loads with a load through a select of the address to load from. + // This triggers in things like "select bool X, 10.0, 123.0" after the FP + // constants have been dropped into the constant pool. + if (LHS.getOpcode() == ISD::LOAD) { + LoadSDNode *LLD = cast<LoadSDNode>(LHS); + LoadSDNode *RLD = cast<LoadSDNode>(RHS); + + // Token chains must be identical. + if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. - !cast<LoadSDNode>(LHS)->isVolatile() && - !cast<LoadSDNode>(RHS)->isVolatile() && - // Token chains must be identical. - LHS.getOperand(0) == RHS.getOperand(0)) { - LoadSDNode *LLD = cast<LoadSDNode>(LHS); - LoadSDNode *RLD = cast<LoadSDNode>(RHS); - - // If this is an EXTLOAD, the VT's must match. - if (LLD->getMemoryVT() == RLD->getMemoryVT()) { + LLD->isVolatile() || RLD->isVolatile() || + // If this is an EXTLOAD, the VT's must match. + LLD->getMemoryVT() != RLD->getMemoryVT() || + // If this is an EXTLOAD, the kind of extension must match. + (LLD->getExtensionType() != RLD->getExtensionType() && + // The only exception is if one of the extensions is anyext. + LLD->getExtensionType() != ISD::EXTLOAD && + RLD->getExtensionType() != ISD::EXTLOAD) || // FIXME: this discards src value information. This is // over-conservative. It would be beneficial to be able to remember // both potential memory locations. Since we are discarding // src value info, don't do the transformation if the memory // locations are not in the default address space. - unsigned LLDAddrSpace = 0, RLDAddrSpace = 0; - if (const Value *LLDVal = LLD->getMemOperand()->getValue()) { - if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType())) - LLDAddrSpace = PT->getAddressSpace(); - } - if (const Value *RLDVal = RLD->getMemOperand()->getValue()) { - if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType())) - RLDAddrSpace = PT->getAddressSpace(); - } - SDValue Addr; - if (LLDAddrSpace == 0 && RLDAddrSpace == 0) { - if (TheSelect->getOpcode() == ISD::SELECT) { - // Check that the condition doesn't reach either load. If so, folding - // this will induce a cycle into the DAG. - if ((!LLD->hasAnyUseOfValue(1) || - !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) && - (!RLD->hasAnyUseOfValue(1) || - !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) { - Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), LLD->getBasePtr(), - RLD->getBasePtr()); - } - } else { - // Check that the condition doesn't reach either load. If so, folding - // this will induce a cycle into the DAG. - if ((!LLD->hasAnyUseOfValue(1) || - (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) && - (!RLD->hasAnyUseOfValue(1) || - (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) { - Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), - TheSelect->getOperand(1), - LLD->getBasePtr(), RLD->getBasePtr(), - TheSelect->getOperand(4)); - } - } - } - - if (Addr.getNode()) { - SDValue Load; - if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { - Load = DAG.getLoad(TheSelect->getValueType(0), - TheSelect->getDebugLoc(), - LLD->getChain(), - Addr, 0, 0, - LLD->isVolatile(), - LLD->isNonTemporal(), - LLD->getAlignment()); - } else { - Load = DAG.getExtLoad(LLD->getExtensionType(), - TheSelect->getValueType(0), - TheSelect->getDebugLoc(), - LLD->getChain(), Addr, 0, 0, - LLD->getMemoryVT(), - LLD->isVolatile(), - LLD->isNonTemporal(), - LLD->getAlignment()); - } + LLD->getPointerInfo().getAddrSpace() != 0 || + RLD->getPointerInfo().getAddrSpace() != 0) + return false; - // Users of the select now use the result of the load. - CombineTo(TheSelect, Load); + // Check that the select condition doesn't reach either load. If so, + // folding this will induce a cycle into the DAG. If not, this is safe to + // xform, so create a select of the addresses. + SDValue Addr; + if (TheSelect->getOpcode() == ISD::SELECT) { + SDNode *CondNode = TheSelect->getOperand(0).getNode(); + if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || + (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) + return false; + Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } else { // Otherwise SELECT_CC + SDNode *CondLHS = TheSelect->getOperand(0).getNode(); + SDNode *CondRHS = TheSelect->getOperand(1).getNode(); + + if ((LLD->hasAnyUseOfValue(1) && + (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || + (LLD->hasAnyUseOfValue(1) && + (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS)))) + return false; - // Users of the old loads now use the new load's chain. We know the - // old-load value is dead now. - CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); - CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); - return true; - } - } - } + Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } + + SDValue Load; + if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { + Load = DAG.getLoad(TheSelect->getValueType(0), + TheSelect->getDebugLoc(), + // FIXME: Discards pointer info. + LLD->getChain(), Addr, MachinePointerInfo(), + LLD->isVolatile(), LLD->isNonTemporal(), + LLD->getAlignment()); + } else { + Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? + RLD->getExtensionType() : LLD->getExtensionType(), + TheSelect->getDebugLoc(), + TheSelect->getValueType(0), + // FIXME: Discards pointer info. + LLD->getChain(), Addr, MachinePointerInfo(), + LLD->getMemoryVT(), LLD->isVolatile(), + LLD->isNonTemporal(), LLD->getAlignment()); + } + + // Users of the select now use the result of the load. + CombineTo(TheSelect, Load); + + // Users of the old loads now use the new load's chain. We know the + // old-load value is dead now. + CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); + CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); + return true; } return false; @@ -6689,7 +6997,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, ISD::CondCode CC, bool NotExtCompare) { // (x ? y : y) -> y. if (N2 == N3) return N2; - + EVT VT = N2.getValueType(); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); @@ -6725,7 +7033,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return DAG.getNode(ISD::FABS, DL, VT, N3); } } - + // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 // in it. This is a win when the constant is not otherwise available because @@ -6748,7 +7056,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, }; const Type *FPTy = Elts[0]->getType(); const TargetData &TD = *TLI.getTargetData(); - + // Create a ConstantArray of the two constants. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2); SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), @@ -6760,7 +7068,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue Zero = DAG.getIntPtrConstant(0); unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); SDValue One = DAG.getIntPtrConstant(EltSize); - + SDValue Cond = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), N0, N1, CC); @@ -6769,11 +7077,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, CstOffset); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, false, + MachinePointerInfo::getConstantPool(), false, false, Alignment); } - } + } // Check to see if we can perform the "gzip trick", transforming // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) @@ -6818,6 +7126,35 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } } + // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) + // where y is has a single bit set. + // A plaintext description would be, we can turn the SELECT_CC into an AND + // when the condition can be materialized as an all-ones register. Any + // single bit-test can be materialized as an all-ones register with + // shift-left and shift-right-arith. + if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && + N0->getValueType(0) == VT && + N1C && N1C->isNullValue() && + N2C && N2C->isNullValue()) { + SDValue AndLHS = N0->getOperand(0); + ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); + if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { + // Shift the tested bit over the sign bit. + APInt AndMask = ConstAndRHS->getAPIntValue(); + SDValue ShlAmt = + DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy()); + SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); + + // Now arithmetic right shift it all the way over, so the result is either + // all-ones, or zero. + SDValue ShrAmt = + DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy()); + SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); + + return DAG.getNode(ISD::AND, DL, VT, Shr, N3); + } + } + // fold select C, 16, 0 -> shl C, 4 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) { @@ -6971,7 +7308,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { } /// FindBaseOffset - Return true if base is a frame index, which is known not -// to alias with anything but itself. Provides base object and offset as results. +// to alias with anything but itself. Provides base object and offset as +// results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, void *&CV) { // Assume it is a primitive operation. @@ -6984,7 +7322,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, Offset += C->getZExtValue(); } } - + // Return the underlying GlobalValue, and update the Offset. Return false // for GlobalAddressSDNode since the same GlobalAddress may be represented // by multiple nodes with different offsets. @@ -7012,9 +7350,11 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, + const MDNode *TBAAInfo1, SDValue Ptr2, int64_t Size2, const Value *SrcValue2, int SrcValueOffset2, - unsigned SrcValueAlign2) const { + unsigned SrcValueAlign2, + const MDNode *TBAAInfo2) const { // If they are the same then they must be aliases. if (Ptr1 == Ptr2) return true; @@ -7030,8 +7370,19 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); - // If we know what the bases are, and they aren't identical, then we know they - // cannot alias. + // It is possible for different frame indices to alias each other, mostly + // when tail call optimization reuses return address slots for arguments. + // To catch this case, look up the actual index of frame indices to compute + // the real alias relationship. + if (isFrameIndex1 && isFrameIndex2) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); + Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); + return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + } + + // Otherwise, if we know what the bases are, and they aren't identical, then + // we know they cannot alias. if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) return false; @@ -7044,20 +7395,21 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, (Size1 == Size2) && (SrcValueAlign1 > Size1)) { int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; - + // There is no overlap between these relatively aligned accesses of similar // size, return no alias. if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) return false; } - + if (CombinerGlobalAA) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; AliasAnalysis::AliasResult AAResult = - AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2); + AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), + AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2)); if (AAResult == AliasAnalysis::NoAlias) return false; } @@ -7070,15 +7422,17 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, /// node. Returns true if the operand was a load. bool DAGCombiner::FindAliasInfo(SDNode *N, SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, + const Value *&SrcValue, int &SrcValueOffset, - unsigned &SrcValueAlign) const { + unsigned &SrcValueAlign, + const MDNode *&TBAAInfo) const { if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { Ptr = LD->getBasePtr(); Size = LD->getMemoryVT().getSizeInBits() >> 3; SrcValue = LD->getSrcValue(); SrcValueOffset = LD->getSrcValueOffset(); SrcValueAlign = LD->getOriginalAlignment(); + TBAAInfo = LD->getTBAAInfo(); return true; } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { Ptr = ST->getBasePtr(); @@ -7086,6 +7440,7 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, SrcValue = ST->getSrcValue(); SrcValueOffset = ST->getSrcValueOffset(); SrcValueAlign = ST->getOriginalAlignment(); + TBAAInfo = ST->getTBAAInfo(); } else { llvm_unreachable("FindAliasInfo expected a memory operand"); } @@ -7106,26 +7461,27 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, const Value *SrcValue; int SrcValueOffset; unsigned SrcValueAlign; - bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, - SrcValueAlign); + const MDNode *SrcTBAAInfo; + bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, + SrcValueAlign, SrcTBAAInfo); // Starting off. Chains.push_back(OriginalChain); unsigned Depth = 0; - + // Look at each chain and determine if it is an alias. If so, add it to the // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { SDValue Chain = Chains.back(); Chains.pop_back(); - - // For TokenFactor nodes, look at each operand and only continue up the - // chain until we find two aliases. If we've seen two aliases, assume we'll + + // For TokenFactor nodes, look at each operand and only continue up the + // chain until we find two aliases. If we've seen two aliases, assume we'll // find more and revert to original chain since the xform is unlikely to be // profitable. - // - // FIXME: The depth check could be made to return the last non-aliasing + // + // FIXME: The depth check could be made to return the last non-aliasing // chain we found before we hit a tokenfactor rather than the original // chain. if (Depth > 6 || Aliases.size() == 2) { @@ -7151,15 +7507,18 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, const Value *OpSrcValue; int OpSrcValueOffset; unsigned OpSrcValueAlign; + const MDNode *OpSrcTBAAInfo; bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, - OpSrcValueAlign); + OpSrcValueAlign, + OpSrcTBAAInfo); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, + SrcTBAAInfo, OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, - OpSrcValueAlign)) { + OpSrcValueAlign, OpSrcTBAAInfo)) { Aliases.push_back(Chain); } else { // Look further up the chain. @@ -7206,9 +7565,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { // If a single operand then chain to it. We don't need to revisit it. return Aliases[0]; } - + // Construct a custom tailored token factor. - return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, &Aliases[0], Aliases.size()); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index a4eed71..490b857 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -55,6 +55,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Debug.h" using namespace llvm; /// startNewBlock - Set the current block to which generated machine @@ -197,12 +198,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } - + // If target-independent code couldn't handle the value, give target-specific // code a try. if (!Reg && isa<Constant>(V)) Reg = TargetMaterializeConstant(cast<Constant>(V)); - + // Don't cache constant materializations in the general ValueMap. // To do so would require tracking what uses they dominate. if (Reg != 0) { @@ -234,7 +235,7 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { LocalValueMap[I] = Reg; return Reg; } - + unsigned &AssignedReg = FuncInfo.ValueMap[I]; if (AssignedReg == 0) // Use the new register. @@ -414,7 +415,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; - uint64_t Offs = + uint64_t Offs = TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); if (N == 0) @@ -423,7 +424,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { NIsKill = true; continue; } - + // N = N + Idx * ElementSize; uint64_t ElementSize = TD.getTypeAllocSize(Ty); std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); @@ -467,16 +468,28 @@ bool FastISel::SelectCall(const User *I) { return true; const Value *Address = DI->getAddress(); - if (!Address) + if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address)) return true; - if (isa<UndefValue>(Address)) - return true; - const AllocaInst *AI = dyn_cast<AllocaInst>(Address); - // Don't handle byval struct arguments or VLAs, for example. - if (!AI) - // Building the map above is target independent. Generating DBG_VALUE - // inline is target dependent; do this now. - (void)TargetSelectInstruction(cast<Instruction>(I)); + + unsigned Reg = 0; + unsigned Offset = 0; + if (const Argument *Arg = dyn_cast<Argument>(Address)) { + if (Arg->hasByValAttr()) { + // Byval arguments' frame index is recorded during argument lowering. + // Use this info directly. + Offset = FuncInfo.getByValArgumentFrameIndex(Arg); + if (Offset) + Reg = TRI.getFrameRegister(*FuncInfo.MF); + } + } + if (!Reg) + Reg = getRegForValue(Address); + + if (Reg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_VALUE)) + .addReg(Reg, RegState::Debug).addImm(Offset) + .addMetadata(DI->getVariable()); return true; } case Intrinsic::dbg_value: { @@ -505,11 +518,8 @@ bool FastISel::SelectCall(const User *I) { } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - // Insert an undef so we can see what we dropped. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) - .addReg(0U).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); - } + DEBUG(dbgs() << "Dropping debug info for " << DI); + } return true; } case Intrinsic::eh_exception: { @@ -582,12 +592,12 @@ bool FastISel::SelectCall(const User *I) { bool FastISel::SelectCast(const User *I, unsigned Opcode) { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); - + if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; - + // Check if the destination type is legal. Or as a special case, // it may be i1 if we're doing a truncate because that's // easy and somewhat common. @@ -629,7 +639,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { InputReg, InputRegIsKill); if (!ResultReg) return false; - + UpdateValueMap(I, ResultReg); return true; } @@ -644,23 +654,23 @@ bool FastISel::SelectBitCast(const User *I) { return true; } - // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators. + // Bitcasts of other values become reg-reg copies or BITCAST operators. EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); - + if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple() || !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT)) // Unhandled type. Halt "fast" selection and bail. return false; - + unsigned Op0 = getRegForValue(I->getOperand(0)); if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); - + // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { @@ -673,15 +683,15 @@ bool FastISel::SelectBitCast(const User *I) { ResultReg).addReg(Op0); } } - - // If the reg-reg copy failed, select a BIT_CONVERT opcode. + + // If the reg-reg copy failed, select a BITCAST opcode. if (!ResultReg) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), - ISD::BIT_CONVERT, Op0, Op0IsKill); - + ISD::BITCAST, Op0, Op0IsKill); + if (!ResultReg) return false; - + UpdateValueMap(I, ResultReg); return true; } @@ -753,7 +763,7 @@ FastISel::SelectFNeg(const User *I) { return false; unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), - ISD::BIT_CONVERT, OpReg, OpRegIsKill); + ISD::BITCAST, OpReg, OpRegIsKill); if (IntReg == 0) return false; @@ -765,7 +775,7 @@ FastISel::SelectFNeg(const User *I) { return false; ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), - ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true); + ISD::BITCAST, IntResultReg, /*Kill=*/true); if (ResultReg == 0) return false; @@ -845,10 +855,10 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { // Dynamic-sized alloca is not handled yet. return false; - + case Instruction::Call: return SelectCall(I); - + case Instruction::BitCast: return SelectBitCast(I); @@ -911,7 +921,7 @@ unsigned FastISel::FastEmit_r(MVT, MVT, return 0; } -unsigned FastISel::FastEmit_rr(MVT, MVT, +unsigned FastISel::FastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/, bool /*Op0IsKill*/, unsigned /*Op1*/, bool /*Op1IsKill*/) { @@ -1139,7 +1149,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); - + if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); else { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 5ef6404..98582ba 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -29,7 +29,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 61c2a90..e309def 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -31,11 +31,11 @@ using namespace llvm; /// CountResults - The results of target nodes have register or immediate -/// operands first, then an optional chain, and optional flag operands (which do +/// operands first, then an optional chain, and optional glue operands (which do /// not go into the resulting MachineInstr). unsigned InstrEmitter::CountResults(SDNode *Node) { unsigned N = Node->getNumValues(); - while (N && Node->getValueType(N - 1) == MVT::Flag) + while (N && Node->getValueType(N - 1) == MVT::Glue) --N; if (N && Node->getValueType(N - 1) == MVT::Other) --N; // Skip over chain result. @@ -43,12 +43,12 @@ unsigned InstrEmitter::CountResults(SDNode *Node) { } /// CountOperands - The inputs to target nodes have any actual inputs first, -/// followed by an optional chain operand, then an optional flag operand. +/// followed by an optional chain operand, then an optional glue operand. /// Compute the number of actual operands that will go into the resulting /// MachineInstr. unsigned InstrEmitter::CountOperands(SDNode *Node) { unsigned N = Node->getNumOperands(); - while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag) + while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) --N; // Ignore chain if it exists. @@ -67,7 +67,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); return; } @@ -96,7 +96,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (Op.getNode() != Node || Op.getResNo() != ResNo) continue; EVT VT = Node->getValueType(Op.getResNo()); - if (VT == MVT::Other || VT == MVT::Flag) + if (VT == MVT::Other || VT == MVT::Glue) continue; Match = false; if (User->isMachineOpcode()) { @@ -150,7 +150,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } @@ -224,7 +224,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } } @@ -264,8 +264,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && - Op.getValueType() != MVT::Flag && - "Chain and flag operands should occur at end of operand list!"); + Op.getValueType() != MVT::Glue && + "Chain and glue operands should occur at end of operand list!"); // Get/emit the operand. unsigned VReg = getVR(Op, VRBaseMap); assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); @@ -377,8 +377,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, BA->getTargetFlags())); } else { assert(Op.getValueType() != MVT::Other && - Op.getValueType() != MVT::Flag && - "Chain and flag operands should occur at end of operand list!"); + Op.getValueType() != MVT::Glue && + "Chain and glue operands should occur at end of operand list!"); AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); } @@ -428,31 +428,47 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // Figure out the register class to create for the destreg. unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); - const TargetRegisterClass *TRC = MRI->getRegClass(VReg); - const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); - assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); - - // Figure out the register class to create for the destreg. - // Note that if we're going to directly use an existing register, - // it must be precisely the required class, and not a subclass - // thereof. - if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { - // Create the reg - assert(SRC && "Couldn't find source register class"); - VRBase = MRI->createVirtualRegister(SRC); - } + MachineInstr *DefMI = MRI->getVRegDef(VReg); + unsigned SrcReg, DstReg, DefSubIdx; + if (DefMI && + TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && + SubIdx == DefSubIdx) { + // Optimize these: + // r1025 = s/zext r1024, 4 + // r1026 = extract_subreg r1025, 4 + // to a copy + // r1026 = copy r1024 + const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg); + VRBase = MRI->createVirtualRegister(TRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); + } else { + const TargetRegisterClass *TRC = MRI->getRegClass(VReg); + const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); + assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI->createVirtualRegister(SRC); + } - // Create the extract_subreg machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetOpcode::COPY), VRBase); + // Create the extract_subreg machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); - // Add source, and subreg index - AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, - IsClone, IsCloned); - assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) && - "Cannot yet extract from physregs"); - MI->getOperand(1).setSubReg(SubIdx); - MBB->insert(InsertPos, MI); + // Add source, and subreg index + AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&& + "Cannot yet extract from physregs"); + MI->getOperand(1).setSubReg(SubIdx); + MBB->insert(InsertPos, MI); + } } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { SDValue N0 = Node->getOperand(0); @@ -496,7 +512,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } @@ -518,7 +534,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } @@ -543,9 +559,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); const TargetRegisterClass *SRC = TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); - if (!SRC) - llvm_unreachable("Invalid subregister index in REG_SEQUENCE"); - if (SRC != RC) { + if (SRC && SRC != RC) { MRI->setRegClass(NewVReg, SRC); RC = SRC; } @@ -557,7 +571,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, MBB->insert(InsertPos, MI); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } @@ -673,10 +687,10 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // The MachineInstr constructor adds implicit-def operands. Scan through // these to determine which are dead. if (MI->getNumOperands() != 0 && - Node->getValueType(Node->getNumValues()-1) == MVT::Flag) { + Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { // First, collect all used registers. SmallVector<unsigned, 8> UsedRegs; - for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser()) + for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) if (F->getOpcode() == ISD::CopyFromReg) UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); else { @@ -689,7 +703,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { unsigned Reg = R->getReg(); - if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) + if (TargetRegisterInfo::isPhysicalRegister(Reg)) UsedRegs.push_back(Reg); } } @@ -721,20 +735,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // hook knows where in the block to insert the replacement code. MBB->insert(InsertPos, MI); - if (II.usesCustomInsertionHook()) { - // Insert this instruction into the basic block using a target - // specific inserter which may returns a new basic block. - bool AtEnd = InsertPos == MBB->end(); - MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); - if (NewMBB != MBB) { - if (AtEnd) - InsertPos = NewMBB->end(); - MBB = NewMBB; - } - return; - } - - // Additional results must be an physical register def. + // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; @@ -742,17 +743,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); // If there are no uses, mark the register as dead now, so that // MachineLICM/Sink can see that it's dead. Don't do this if the - // node has a Flag value, for the benefit of targets still using - // Flag for values in physregs. - else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag) + // node has a Glue value, for the benefit of targets still using + // Glue for values in physregs. + else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) MI->addRegisterDead(Reg, TRI); } } // If the instruction has implicit defs and the node doesn't, mark the - // implicit def as dead. If the node has any flag outputs, we don't do this - // because we don't know what implicit defs are being used by flagged nodes. - if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag) + // implicit def as dead. If the node has any glue outputs, we don't do this + // because we don't know what implicit defs are being used by glued nodes. + if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) if (const unsigned *IDList = II.getImplicitDefs()) { for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs(); i != e; ++i) @@ -808,8 +809,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) - --NumOps; // Ignore the flag operand. + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. // Create the inline asm machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), @@ -820,11 +821,11 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); - // Add the isAlignStack bit. - int64_t isAlignStack = - cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))-> + // Add the HasSideEffect and isAlignStack bits. + int64_t ExtraInfo = + cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))-> getZExtValue(); - MI->addOperand(MachineOperand::CreateImm(isAlignStack)); + MI->addOperand(MachineOperand::CreateImm(ExtraInfo)); // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2981cd3..49c862c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -65,11 +66,6 @@ class SelectionDAGLegalize { /// against each other, including inserted libcalls. SDValue LastCALLSEQ_END; - /// IsLegalizingCall - This member is used *only* for purposes of providing - /// helpful assertions that a libcall isn't created while another call is - /// being legalized (which could lead to non-serialized call sequences). - bool IsLegalizingCall; - enum LegalizeAction { Legal, // The target natively supports this operation. Promote, // This operation should be executed in a larger type. @@ -91,6 +87,9 @@ class SelectionDAGLegalize { // If someone requests legalization of the new node, return itself. if (From != To) LegalizedNodes.insert(std::make_pair(To, To)); + + // Transfer SDDbgValues. + DAG.TransferDbgValues(From, To); } public: @@ -172,6 +171,7 @@ private: SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); + SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); @@ -224,7 +224,6 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, void SelectionDAGLegalize::LegalizeDAG() { LastCALLSEQ_END = DAG.getEntryNode(); - IsLegalizingCall = false; // The legalize process is inherently a bottom-up recursive process (users // legalize their uses before themselves). Given infinite stack space, we @@ -251,9 +250,16 @@ void SelectionDAGLegalize::LegalizeDAG() { /// FindCallEndFromCallStart - Given a chained node that is part of a call /// sequence, find the CALLSEQ_END node that terminates the call sequence. -static SDNode *FindCallEndFromCallStart(SDNode *Node) { - if (Node->getOpcode() == ISD::CALLSEQ_END) - return Node; +static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { + // Nested CALLSEQ_START/END constructs aren't yet legal, + // but we can DTRT and handle them correctly here. + if (Node->getOpcode() == ISD::CALLSEQ_START) + depth++; + else if (Node->getOpcode() == ISD::CALLSEQ_END) { + depth--; + if (depth == 0) + return Node; + } if (Node->use_empty()) return 0; // No CallSeqEnd @@ -283,7 +289,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) { SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User)) + if (SDNode *Result = FindCallEndFromCallStart(User, depth)) return Result; } return 0; @@ -292,12 +298,26 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) { /// FindCallStartFromCallEnd - Given a chained node that is part of a call /// sequence, find the CALLSEQ_START node that initiates the call sequence. static SDNode *FindCallStartFromCallEnd(SDNode *Node) { + int nested = 0; assert(Node && "Didn't find callseq_start for a call??"); - if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; - - assert(Node->getOperand(0).getValueType() == MVT::Other && - "Node doesn't have a token chain argument!"); - return FindCallStartFromCallEnd(Node->getOperand(0).getNode()); + while (Node->getOpcode() != ISD::CALLSEQ_START || nested) { + Node = Node->getOperand(0).getNode(); + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Node doesn't have a token chain argument!"); + switch (Node->getOpcode()) { + default: + break; + case ISD::CALLSEQ_START: + if (!nested) + return Node; + nested--; + break; + case ISD::CALLSEQ_END: + nested++; + break; + } + } + return 0; } /// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to @@ -377,12 +397,12 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); if (Extend) - return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl, + return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), - CPIdx, PseudoSourceValue::getConstantPool(), - 0, VT, false, false, Alignment); + CPIdx, MachinePointerInfo::getConstantPool(), + VT, false, false, Alignment); return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, false, false, + MachinePointerInfo::getConstantPool(), false, false, Alignment); } @@ -395,7 +415,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); - int SVOffset = ST->getSrcValueOffset(); DebugLoc dl = ST->getDebugLoc(); if (ST->getMemoryVT().isFloatingPoint() || ST->getMemoryVT().isVector()) { @@ -404,10 +423,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Expand to a bitconvert of the value to the integer type of the // same size, then a (misaligned) int store. // FIXME: Does not handle truncating floating point stores! - SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val); - return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(), - SVOffset, ST->isVolatile(), ST->isNonTemporal(), - Alignment); + SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); + return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), Alignment); } else { // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. @@ -425,8 +443,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Perform the original store, only redirected to the stack slot. SDValue Store = DAG.getTruncStore(Chain, dl, - Val, StackPtr, NULL, 0, StoredVT, - false, false, 0); + Val, StackPtr, MachinePointerInfo(), + StoredVT, false, false, 0); SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); SmallVector<SDValue, 8> Stores; unsigned Offset = 0; @@ -434,11 +452,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Do all but one copies using the full register width. for (unsigned i = 1; i < NumRegs; i++) { // Load one integer register's worth from the stack slot. - SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0, + SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, + MachinePointerInfo(), false, false, 0); // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, - ST->getSrcValue(), SVOffset + Offset, + ST->getPointerInfo().getWithOffset(Offset), ST->isVolatile(), ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // Increment the pointers. @@ -455,11 +474,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, 8 * (StoredBytes - Offset)); // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr, - NULL, 0, MemVT, false, false, 0); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + MachinePointerInfo(), + MemVT, false, false, 0); Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, - ST->getSrcValue(), SVOffset + Offset, + ST->getPointerInfo() + .getWithOffset(Offset), MemVT, ST->isVolatile(), ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); @@ -484,13 +505,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Store the two parts SDValue Store1, Store2; Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, - ST->getSrcValue(), SVOffset, NewStoredVT, + ST->getPointerInfo(), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, - ST->getSrcValue(), SVOffset + IncrementSize, + ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); @@ -501,7 +522,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, static SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, const TargetLowering &TLI) { - int SVOffset = LD->getSrcValueOffset(); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); @@ -512,74 +532,75 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (TLI.isTypeLegal(intVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. - SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset, LD->isVolatile(), + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); - SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad); + SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); SDValue Ops[] = { Result, Chain }; return DAG.getMergeValues(Ops, 2, dl); - } else { - // Copy the value to a (aligned) stack slot using (unaligned) integer - // loads and stores, then do a (aligned) load from the stack slot. - EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); - unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; - unsigned RegBytes = RegVT.getSizeInBits() / 8; - unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; - - // Make sure the stack slot is also aligned for the register type. - SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); - - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); - SmallVector<SDValue, 8> Stores; - SDValue StackPtr = StackBase; - unsigned Offset = 0; - - // Do all but one copies using the full register width. - for (unsigned i = 1; i < NumRegs; i++) { - // Load one integer register's worth from the original location. - SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset + Offset, LD->isVolatile(), - LD->isNonTemporal(), - MinAlign(LD->getAlignment(), Offset)); - // Follow the load with a store to the stack slot. Remember the store. - Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, - NULL, 0, false, false, 0)); - // Increment the pointers. - Offset += RegBytes; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - Increment); - } + } - // The last copy may be partial. Do an extending load. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), - 8 * (LoadedBytes - Offset)); - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr, - LD->getSrcValue(), SVOffset + Offset, - MemVT, LD->isVolatile(), - LD->isNonTemporal(), - MinAlign(LD->getAlignment(), Offset)); + // Copy the value to a (aligned) stack slot using (unaligned) integer + // loads and stores, then do a (aligned) load from the stack slot. + EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); + unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); + + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector<SDValue, 8> Stores; + SDValue StackPtr = StackBase; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the original location. + SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(Offset), + LD->isVolatile(), LD->isNonTemporal(), + MinAlign(LD->getAlignment(), Offset)); // Follow the load with a store to the stack slot. Remember the store. - // On big-endian machines this requires a truncating store to ensure - // that the bits end up in the right place. - Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, - NULL, 0, MemVT, false, false, 0)); - - // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); - - // Finally, perform the original load only redirected to the stack slot. - Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase, - NULL, 0, LoadedVT, false, false, 0); - - // Callers expect a MERGE_VALUES node. - SDValue Ops[] = { Load, TF }; - return DAG.getMergeValues(Ops, 2, dl); + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo(), false, false, 0)); + // Increment the pointers. + Offset += RegBytes; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); } + + // The last copy may be partial. Do an extending load. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (LoadedBytes - Offset)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(Offset), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), + MinAlign(LD->getAlignment(), Offset)); + // Follow the load with a store to the stack slot. Remember the store. + // On big-endian machines this requires a truncating store to ensure + // that the bits end up in the right place. + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo(), MemVT, + false, false, 0)); + + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + + // Finally, perform the original load only redirected to the stack slot. + Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + MachinePointerInfo(), LoadedVT, false, false, 0); + + // Callers expect a MERGE_VALUES node. + SDValue Ops[] = { Load, TF }; + return DAG.getMergeValues(Ops, 2, dl); } assert(LoadedVT.isInteger() && !LoadedVT.isVector() && "Unaligned load of unsupported type."); @@ -602,22 +623,24 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Load the value in two parts SDValue Lo, Hi; if (TLI.isLittleEndian()) { - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset, NewLoadedVT, LD->isVolatile(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); - Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); } else { - Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset, NewLoadedVT, LD->isVolatile(), + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); } @@ -660,7 +683,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, // Store the vector. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0, + MachinePointerInfo::getFixedStack(SPFI), false, false, 0); // Truncate or zero extend offset to target pointer type. @@ -671,13 +694,11 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT)); SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); // Store the scalar value. - Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, - PseudoSourceValue::getFixedStack(SPFI), 0, EltVT, + Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT, false, false, 0); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0, - false, false, 0); + MachinePointerInfo::getFixedStack(SPFI), false, false, 0); } @@ -719,7 +740,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Tmp1 = ST->getChain(); SDValue Tmp2 = ST->getBasePtr(); SDValue Tmp3; - int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); @@ -730,29 +750,34 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { Tmp3 = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), MVT::i32); - return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, isNonTemporal, Alignment); - } else if (CFP->getValueType(0) == MVT::f64) { + return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + } + + if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. if (getTypeAction(MVT::i64) == Legal) { Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); - return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, isNonTemporal, Alignment); - } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { + return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + } + + if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { // Otherwise, if the target supports 32-bit registers, use 2 32-bit // stores. If the target supports neither 32- nor 64-bits, this // xform is certainly not worth it. const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); - SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32); + SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32); SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, isNonTemporal, Alignment); + Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(4)); - Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, + Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, + ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); @@ -792,7 +817,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { bool isCustom = false; // Figure out the correct action; the way to query this varies by opcode - TargetLowering::LegalizeAction Action; + TargetLowering::LegalizeAction Action = TargetLowering::Legal; bool SimpleFinishLegalizing = true; switch (Node->getOpcode()) { case ISD::INTRINSIC_W_CHAIN: @@ -860,6 +885,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::FRAME_TO_ARGS_OFFSET: case ISD::EH_SJLJ_SETJMP: case ISD::EH_SJLJ_LONGJMP: + case ISD::EH_SJLJ_DISPATCHSETUP: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -996,6 +1022,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } break; case ISD::CALLSEQ_START: { + static int depth = 0; SDNode *CallEnd = FindCallEndFromCallStart(Node); // Recursively Legalize all of the inputs of the call end that do not lead @@ -1013,7 +1040,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Merge in the last call to ensure that this call starts after the last // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { + if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) { Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1, LastCALLSEQ_END); Tmp1 = LegalizeOp(Tmp1); @@ -1036,14 +1063,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // sequence have been legalized, legalize the call itself. During this // process, no libcalls can/will be inserted, guaranteeing that no calls // can overlap. - assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); + + SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ; // Note that we are selecting this call! LastCALLSEQ_END = SDValue(CallEnd, 0); - IsLegalizingCall = true; + depth++; // Legalize the call, starting from the CALLSEQ_END. LegalizeOp(LastCALLSEQ_END); - assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); + depth--; + assert(depth >= 0 && "Un-matched CALLSEQ_START?"); + if (depth > 0) + LastCALLSEQ_END = Saved_LastCALLSEQ_END; return Result; } case ISD::CALLSEQ_END: @@ -1062,7 +1093,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. // Do not try to legalize the target-specific arguments (#1+), except for // an optional flag input. - if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){ + if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){ if (Tmp1 != Node->getOperand(0)) { SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; @@ -1082,10 +1113,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result.getResNo()); } } - assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); // This finishes up call legalization. - IsLegalizingCall = false; - // If the CALLSEQ_END node has a flag, remember that we legalized it. AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); if (Node->getNumValues() == 2) @@ -1136,11 +1164,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Change base type to a different vector type. EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); - Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(), - LD->getSrcValueOffset(), + Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); - Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1)); + Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1)); Tmp4 = LegalizeOp(Tmp1.getValue(1)); break; } @@ -1150,227 +1177,224 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { AddLegalizedOperand(SDValue(Node, 0), Tmp3); AddLegalizedOperand(SDValue(Node, 1), Tmp4); return Op.getResNo() ? Tmp4 : Tmp3; - } else { - EVT SrcVT = LD->getMemoryVT(); - unsigned SrcWidth = SrcVT.getSizeInBits(); - int SVOffset = LD->getSrcValueOffset(); - unsigned Alignment = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - - if (SrcWidth != SrcVT.getStoreSizeInBits() && - // Some targets pretend to have an i1 loading operation, and actually - // load an i8. This trick is correct for ZEXTLOAD because the top 7 - // bits are guaranteed to be zero; it helps the optimizers understand - // that these bits are zero. It is also useful for EXTLOAD, since it - // tells the optimizers that those bits are undefined. It would be - // nice to have an effective generic way of getting these benefits... - // Until such a way is found, don't insist on promoting i1 here. - (SrcVT != MVT::i1 || - TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { - // Promote to a byte-sized load if not loading an integral number of - // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. - unsigned NewWidth = SrcVT.getStoreSizeInBits(); - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); - SDValue Ch; - - // The extra bits are guaranteed to be zero, since we stored them that - // way. A zext load from NVT thus automatically gives zext from SrcVT. - - ISD::LoadExtType NewExtType = - ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - - Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl, - Tmp1, Tmp2, LD->getSrcValue(), SVOffset, - NVT, isVolatile, isNonTemporal, Alignment); - - Ch = Result.getValue(1); // The chain. - - if (ExtType == ISD::SEXTLOAD) - // Having the top bits zero doesn't help when sign extending. - Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) - // All the top bits are guaranteed to be zero - inform the optimizers. - Result = DAG.getNode(ISD::AssertZext, dl, - Result.getValueType(), Result, - DAG.getValueType(SrcVT)); - - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); - } else if (SrcWidth & (SrcWidth - 1)) { - // If not loading a power-of-2 number of bits, expand as two loads. - assert(!SrcVT.isVector() && "Unsupported extload!"); - unsigned RoundWidth = 1 << Log2_32(SrcWidth); - assert(RoundWidth < SrcWidth); - unsigned ExtraWidth = SrcWidth - RoundWidth; - assert(ExtraWidth < RoundWidth); - assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && - "Load size not an integral number of bytes!"); - EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); - EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); - SDValue Lo, Hi, Ch; - unsigned IncrementSize; + } - if (TLI.isLittleEndian()) { - // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) - // Load the bottom RoundWidth bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl, - Tmp1, Tmp2, - LD->getSrcValue(), SVOffset, RoundVT, isVolatile, - isNonTemporal, Alignment); - - // Load the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, - LD->getSrcValue(), SVOffset + IncrementSize, - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - - // Build a factor node to remember that this load is independent of - // the other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Move the top bits to the right place. - Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + EVT SrcVT = LD->getMemoryVT(); + unsigned SrcWidth = SrcVT.getSizeInBits(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + + if (SrcWidth != SrcVT.getStoreSizeInBits() && + // Some targets pretend to have an i1 loading operation, and actually + // load an i8. This trick is correct for ZEXTLOAD because the top 7 + // bits are guaranteed to be zero; it helps the optimizers understand + // that these bits are zero. It is also useful for EXTLOAD, since it + // tells the optimizers that those bits are undefined. It would be + // nice to have an effective generic way of getting these benefits... + // Until such a way is found, don't insist on promoting i1 here. + (SrcVT != MVT::i1 || + TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + unsigned NewWidth = SrcVT.getStoreSizeInBits(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); + SDValue Ch; + + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from NVT thus automatically gives zext from SrcVT. + + ISD::LoadExtType NewExtType = + ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + + Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + + Ch = Result.getValue(1); // The chain. + + if (ExtType == ISD::SEXTLOAD) + // Having the top bits zero doesn't help when sign extending. + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) + // All the top bits are guaranteed to be zero - inform the optimizers. + Result = DAG.getNode(ISD::AssertZext, dl, + Result.getValueType(), Result, + DAG.getValueType(SrcVT)); + + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); + } else if (SrcWidth & (SrcWidth - 1)) { + // If not loading a power-of-2 number of bits, expand as two loads. + assert(!SrcVT.isVector() && "Unsupported extload!"); + unsigned RoundWidth = 1 << Log2_32(SrcWidth); + assert(RoundWidth < SrcWidth); + unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Load size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi, Ch; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) + // Load the bottom RoundWidth bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + + // Join the hi and lo parts. + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } else { + // Big endian - avoid unaligned loads. + // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 + // Load the top RoundWidth bits. + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, + dl, Node->getValueType(0), Tmp1, Tmp2, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + + // Join the hi and lo parts. + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } - // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); + } else { + switch (TLI.getLoadExtAction(ExtType, SrcVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); + + if (isCustom) { + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.getNode()) { + Tmp1 = LegalizeOp(Tmp3); + Tmp2 = LegalizeOp(Tmp3.getValue(1)); + } } else { - // Big endian - avoid unaligned loads. - // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 - // Load the top RoundWidth bits. - Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, - LD->getSrcValue(), SVOffset, RoundVT, isVolatile, - isNonTemporal, Alignment); - - // Load the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, - Node->getValueType(0), dl, Tmp1, Tmp2, - LD->getSrcValue(), SVOffset + IncrementSize, - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - - // Build a factor node to remember that this load is independent of - // the other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Move the top bits to the right place. - Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); - - // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); - } - - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); - } else { - switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: assert(0 && "This action is not supported yet!"); - case TargetLowering::Custom: - isCustom = true; - // FALLTHROUGH - case TargetLowering::Legal: - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp1 = Result.getValue(0); - Tmp2 = Result.getValue(1); - - if (isCustom) { - Tmp3 = TLI.LowerOperation(Result, DAG); - if (Tmp3.getNode()) { - Tmp1 = LegalizeOp(Tmp3); - Tmp2 = LegalizeOp(Tmp3.getValue(1)); - } - } else { - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - const Type *Ty = - LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getTargetData()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), - DAG, TLI); - Tmp1 = Result.getOperand(0); - Tmp2 = Result.getOperand(1); - Tmp1 = LegalizeOp(Tmp1); - Tmp2 = LegalizeOp(Tmp2); - } + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + const Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), + DAG, TLI); + Tmp1 = Result.getOperand(0); + Tmp2 = Result.getOperand(1); + Tmp1 = LegalizeOp(Tmp1); + Tmp2 = LegalizeOp(Tmp2); } } - break; - case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), - LD->getSrcValueOffset(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); - break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); - } - Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp1 = LegalizeOp(Result); // Relegalize new nodes. - Tmp2 = LegalizeOp(Load.getValue(1)); + } + break; + case TargetLowering::Expand: + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { + SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, + LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); } - // FIXME: This does not work for vectors on most targets. Sign- and - // zero-extend operations are currently folded into extending loads, - // whether they are legal or not, and then we end up here without any - // support for legalizing them. - assert(ExtType != ISD::EXTLOAD && - "EXTLOAD should always be supported!"); - // Turn the unsupported load into an EXTLOAD followed by an explicit - // zero/sign extend inreg. - Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl, - Tmp1, Tmp2, LD->getSrcValue(), - LD->getSrcValueOffset(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - SDValue ValRes; - if (ExtType == ISD::SEXTLOAD) - ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else - ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT); - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Tmp1 = LegalizeOp(Result); // Relegalize new nodes. + Tmp2 = LegalizeOp(Load.getValue(1)); break; } + // FIXME: This does not work for vectors on most targets. Sign- and + // zero-extend operations are currently folded into extending loads, + // whether they are legal or not, and then we end up here without any + // support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an explicit + // zero/sign extend inreg. + Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + break; } - - // Since loads produce two values, make sure to remember that we legalized - // both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp1); - AddLegalizedOperand(SDValue(Node, 1), Tmp2); - return Op.getResNo() ? Tmp2 : Tmp1; } + + // Since loads produce two values, make sure to remember that we legalized + // both of them. + AddLegalizedOperand(SDValue(Node, 0), Tmp1); + AddLegalizedOperand(SDValue(Node, 1), Tmp2); + return Op.getResNo() ? Tmp2 : Tmp1; } case ISD::STORE: { StoreSDNode *ST = cast<StoreSDNode>(Node); Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. - int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); @@ -1408,10 +1432,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; case TargetLowering::Promote: assert(VT.isVector() && "Unknown legal promote case!"); - Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl, + Tmp3 = DAG.getNode(ISD::BITCAST, dl, TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getSrcValue(), SVOffset, isVolatile, + ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); break; } @@ -1430,9 +1454,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, NVT, isVolatile, isNonTemporal, - Alignment); + Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1450,8 +1473,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (TLI.isLittleEndian()) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) // Store the bottom RoundWidth bits. - Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, RoundVT, + Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + RoundVT, isVolatile, isNonTemporal, Alignment); // Store the remaining ExtraWidth bits. @@ -1460,9 +1483,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); - Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), - SVOffset + IncrementSize, ExtraVT, isVolatile, - isNonTemporal, + Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); } else { // Big endian - avoid unaligned stores. @@ -1470,17 +1493,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Store the top RoundWidth bits. Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); - Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), - SVOffset, RoundVT, isVolatile, isNonTemporal, - Alignment); + Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(), + RoundVT, isVolatile, isNonTemporal, Alignment); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset + IncrementSize, ExtraVT, isVolatile, - isNonTemporal, + Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); } @@ -1514,9 +1536,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // TRUNCSTORE:i16 i32 -> STORE i16 assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, isNonTemporal, - Alignment); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); break; } } @@ -1543,8 +1564,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { DebugLoc dl = Op.getDebugLoc(); // Store the value to a temporary stack slot, then LOAD the returned part. SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0, - false, false, 0); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); // Add the offset to the index. unsigned EltSize = @@ -1560,12 +1581,56 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); if (Op.getValueType().isVector()) - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0, + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(), false, false, 0); + return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, + MachinePointerInfo(), + Vec.getValueType().getVectorElementType(), + false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { + assert(Op.getValueType().isVector() && "Non-vector insert subvector!"); + + SDValue Vec = Op.getOperand(0); + SDValue Part = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + // Store the value to a temporary stack slot, then LOAD the returned part. + + SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); + + // First store the whole vector. + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + false, false, 0); + + // Then store the inserted part. + + // Add the offset to the index. + unsigned EltSize = + Vec.getValueType().getVectorElementType().getSizeInBits()/8; + + Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, + DAG.getConstant(EltSize, Idx.getValueType())); + + if (Idx.getValueType().bitsGT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); else - return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr, - NULL, 0, Vec.getValueType().getVectorElementType(), - false, false, 0); + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + StackPtr); + + // Store the subvector. + Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr, + MachinePointerInfo(), false, false, 0); + + // Finally, load the updated vector. + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, + false, false, 0); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1578,7 +1643,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { DebugLoc dl = Node->getDebugLoc(); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(FI); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); // Emit a store of each element to the stack slot. SmallVector<SDValue, 8> Stores; @@ -1597,11 +1662,13 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // element type, only store the bits necessary. if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, - Node->getOperand(i), Idx, SV, Offset, + Node->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), EltVT, false, false, 0)); } else Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, - Node->getOperand(i), Idx, SV, Offset, + Node->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), false, false, 0)); } @@ -1613,7 +1680,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0); + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { @@ -1628,7 +1695,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits()); if (isTypeLegal(IVT)) { // Convert to an integer with the same sign bit. - SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2); + SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2); } else { // Store the float to memory, then load the sign part out as an integer. MVT LoadTy = TLI.getPointerTy(); @@ -1636,12 +1703,13 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); // Then store the float to it. SDValue Ch = - DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, NULL, 0, + DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(), false, false, 0); if (TLI.isBigEndian()) { assert(FloatVT.isByteSized() && "Unsupported floating point type!"); // Load out a legal integer with the same sign bit as the float. - SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, NULL, 0, false, false, 0); + SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), + false, false, 0); } else { // Little endian SDValue LoadPtr = StackPtr; // The float may be wider than the integer we are going to load. Advance @@ -1651,7 +1719,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, DAG.getIntPtrConstant(ByteOffset)); // Load a legal integer containing the sign bit. - SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, NULL, 0, false, false, 0); + SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), + false, false, 0); // Move the sign bit to the top bit of the loaded integer. unsigned BitShift = LoadTy.getSizeInBits() - (FloatVT.getSizeInBits() - 8 * ByteOffset); @@ -1694,7 +1763,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); - unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); if (Align > StackAlign) SP = DAG.getNode(ISD::AND, dl, VT, SP, DAG.getConstant(-(uint64_t)Align, VT)); @@ -1768,7 +1837,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); int SPFI = StackPtrFI->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); unsigned SlotSize = SlotVT.getSizeInBits(); @@ -1782,21 +1851,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, if (SrcSize > SlotSize) Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, - SV, 0, SlotVT, false, false, SrcAlign); + PtrInfo, SlotVT, false, false, SrcAlign); else { assert(SrcSize == SlotSize && "Invalid store"); Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, - SV, 0, false, false, SrcAlign); + PtrInfo, false, false, SrcAlign); } // Result is a load from the stack slot. if (SlotSize == DestSize) - return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false, - DestAlign); + return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, + false, false, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); - return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT, - false, false, DestAlign); + return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, + PtrInfo, SlotVT, false, false, DestAlign); } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { @@ -1810,11 +1879,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0, + MachinePointerInfo::getFixedStack(SPFI), Node->getValueType(0).getVectorElementType(), false, false, 0); return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0, + MachinePointerInfo::getFixedStack(SPFI), false, false, 0); } @@ -1888,7 +1957,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, Alignment); } @@ -1924,7 +1993,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. @@ -1945,12 +2013,20 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // Splice the libcall in wherever FindInputOutputChains tells us to. const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + + // isTailCall may be true since the callee does not reference caller stack + // frame. Check if it's in the right position. + bool isTailCall = isInTailCallPosition(DAG, Node, TLI); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), false, + 0, TLI.getLibcallCallingConv(LC), isTailCall, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); + if (!CallInfo.second.getNode()) + // It's a tailcall, return the chain (which is the DAG root). + return DAG.getRoot(); + // Legalize the call sequence, starting with the chain. This will advance // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). @@ -1964,7 +2040,6 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -1985,7 +2060,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); @@ -2064,16 +2139,17 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, } // store the lo of the constructed double - based on integer input SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, - Op0Mapped, Lo, NULL, 0, + Op0Mapped, Lo, MachinePointerInfo(), false, false, 0); // initial hi portion of constructed double SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32); // store the hi of the constructed double - biased exponent - SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0, - false, false, 0); + SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi, + MachinePointerInfo(), + false, false, 0); // load the constructed double - SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0, - false, false, 0); + SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, + MachinePointerInfo(), false, false, 0); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : @@ -2116,17 +2192,40 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, DAG.getConstant(32, MVT::i64)); SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52); SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); - SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr); - SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr); + SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr); + SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr); SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52); return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } - // Implementation of unsigned i64 to f32. This implementation has the - // advantage of performing rounding correctly. + // Implementation of unsigned i64 to f32. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { + // For unsigned conversions, convert them to signed conversions using the + // algorithm from the x86_64 __floatundidf in compiler_rt. + if (!isSigned) { + SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); + + SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy()); + SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); + SDValue AndConst = DAG.getConstant(1, MVT::i64); + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr); + + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or); + SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt); + + // TODO: This really should be implemented using a branch rather than a + // select. We happen to get lucky and machinesink does the right + // thing most of the time. This would be a good candidate for a + //pseudo-op, or, even better, for whole-function isel. + SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT); + return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast); + } + + // Otherwise, implement the fully general conversion. EVT SHVT = TLI.getShiftAmountTy(); SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, @@ -2140,7 +2239,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), - ISD::SETUGE); + ISD::SETUGE); SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, @@ -2155,7 +2254,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, DAG.getIntPtrConstant(0)); - } SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); @@ -2189,13 +2287,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue FudgeInReg; if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, Alignment); else { FudgeInReg = - LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, + LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), MVT::f32, false, false, Alignment)); } @@ -2332,6 +2430,18 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { } } +/// SplatByte - Distribute ByteVal over NumBits bits. +// FIXME: Move this helper to a common place. +static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { + APInt Val = APInt(NumBits, ByteVal); + unsigned Shift = 8; + for (unsigned i = NumBits; i > 8; i >>= 1) { + Val = (Val << Shift) | Val; + Shift <<= 1; + } + return Val; +} + /// ExpandBitCount - Expand the specified bitcount instruction into operations. /// SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, @@ -2339,26 +2449,45 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, switch (Opc) { default: assert(0 && "Cannot expand this yet!"); case ISD::CTPOP: { - static const uint64_t mask[6] = { - 0x5555555555555555ULL, 0x3333333333333333ULL, - 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, - 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL - }; EVT VT = Op.getValueType(); EVT ShVT = TLI.getShiftAmountTy(); - unsigned len = VT.getSizeInBits(); - for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { - //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8]) - unsigned EltSize = VT.isVector() ? - VT.getVectorElementType().getSizeInBits() : len; - SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT); - SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); - Op = DAG.getNode(ISD::ADD, dl, VT, - DAG.getNode(ISD::AND, dl, VT, Op, Tmp2), - DAG.getNode(ISD::AND, dl, VT, - DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3), - Tmp2)); - } + unsigned Len = VT.getSizeInBits(); + + assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 && + "CTPOP not implemented for this type."); + + // This is the "best" algorithm from + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + + SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT); + SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT); + SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT); + SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT); + + // v = v - ((v >> 1) & 0x55555555...) + Op = DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(1, ShVT)), + Mask55)); + // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) + Op = DAG.getNode(ISD::ADD, dl, VT, + DAG.getNode(ISD::AND, dl, VT, Op, Mask33), + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(2, ShVT)), + Mask33)); + // v = (v + (v >> 4)) & 0x0F0F0F0F... + Op = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ADD, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(4, ShVT))), + Mask0F); + // v = (v * 0x01010101...) >> (Len - 8) + Op = DAG.getNode(ISD::SRL, dl, VT, + DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), + DAG.getConstant(Len - 8, ShVT)); + return Op; } case ISD::CTLZ: { @@ -2516,9 +2645,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::PREFETCH: case ISD::VAEND: case ISD::EH_SJLJ_LONGJMP: + case ISD::EH_SJLJ_DISPATCHSETUP: + // If the target didn't expand these, there's nothing to do, so just + // preserve the chain and be done. Results.push_back(Node->getOperand(0)); break; case ISD::EH_SJLJ_SETJMP: + // If the target didn't expand this, just return 'zero' and preserve the + // chain. Results.push_back(DAG.getConstant(0, MVT::i32)); Results.push_back(Node->getOperand(0)); break; @@ -2527,7 +2661,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TargetLowering::ArgListTy Args; std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, false, + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()), @@ -2538,7 +2673,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // By default, atomic intrinsics are marked Legal and lowered. Targets // which don't support them directly, however, may want libcalls, in which // case they mark them Expand, and we get here. - // FIXME: Unimplemented for now. Add libcalls. case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: @@ -2578,7 +2712,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TargetLowering::ArgListTy Args; std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, false, + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("abort", TLI.getPointerTy()), Args, DAG, dl); @@ -2586,7 +2721,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::FP_ROUND: - case ISD::BIT_CONVERT: + case ISD::BITCAST: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), Node->getValueType(0), dl); Results.push_back(Tmp1); @@ -2637,8 +2772,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); EVT NVT = Node->getValueType(0); - const uint64_t zero[] = {0, 0}; - APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); + APFloat apf(APInt::getNullValue(VT.getSizeInBits())); APInt x = APInt::getSignBit(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, VT); @@ -2662,8 +2796,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Tmp2 = Node->getOperand(1); unsigned Align = Node->getConstantOperandVal(3); - SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, - false, false, 0); + SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, 0); SDValue VAList = VAListLoad; if (Align > TLI.getMinStackArgumentAlignment()) { @@ -2674,7 +2808,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TLI.getPointerTy())); VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, - DAG.getConstant(-Align, + DAG.getConstant(-(int64_t)Align, TLI.getPointerTy())); } @@ -2684,10 +2818,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0, - false, false, 0); + Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, + MachinePointerInfo(V), false, false, 0); // Load the actual argument out of the pointer VAList - Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, + Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), false, false, 0)); Results.push_back(Results[0].getValue(1)); break; @@ -2698,16 +2832,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), - Node->getOperand(2), VS, 0, false, false, 0); - Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0, - false, false, 0); + Node->getOperand(2), MachinePointerInfo(VS), + false, false, 0); + Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), + MachinePointerInfo(VD), false, false, 0); Results.push_back(Tmp1); break; } case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. - Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), + Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Node->getOperand(0)); else Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); @@ -2716,6 +2851,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::EXTRACT_SUBVECTOR: Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); break; + case ISD::INSERT_SUBVECTOR: + Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0))); + break; case ISD::CONCAT_VECTORS: { Results.push_back(ExpandVectorBuildThroughStack(Node)); break; @@ -3094,14 +3232,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, RHS); TopHalf = BottomHalf.getValue(1); - } else { - // FIXME: We should be able to fall back to a libcall with an illegal - // type in some cases. - // Also, we can fall back to a division in some cases, but that's a big - // performance hit in the general case. - assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), - VT.getSizeInBits() * 2)) && - "Don't know how to expand this operation yet!"); + } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() * 2))) { EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); @@ -3110,6 +3242,30 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, DAG.getIntPtrConstant(0)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, DAG.getIntPtrConstant(1)); + } else { + // We can fall back to a libcall with an illegal type for the MUL if we + // have a libcall big enough. + // Also, we can fall back to a division in some cases, but that's a big + // performance hit in the general case. + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (WideVT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (WideVT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (WideVT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (WideVT == MVT::i128) + LC = RTLIB::MUL_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); + LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); + RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); + + SDValue Ret = ExpandLibCall(LC, Node, isSigned); + BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret); + TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret, + DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy())); + TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf); } if (isSigned) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy()); @@ -3165,8 +3321,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); - SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0, MemVT, + SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, + MachinePointerInfo::getJumpTable(), MemVT, false, false, 0); Addr = LD; if (TM.getRelocationModel() == Reloc::PIC_) { @@ -3329,8 +3485,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, case ISD::XOR: { unsigned ExtOp, TruncOp; if (OVT.isVector()) { - ExtOp = ISD::BIT_CONVERT; - TruncOp = ISD::BIT_CONVERT; + ExtOp = ISD::BITCAST; + TruncOp = ISD::BITCAST; } else { assert(OVT.isInteger() && "Cannot promote logic operation"); ExtOp = ISD::ANY_EXTEND; @@ -3347,8 +3503,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, case ISD::SELECT: { unsigned ExtOp, TruncOp; if (Node->getValueType(0).isVector()) { - ExtOp = ISD::BIT_CONVERT; - TruncOp = ISD::BIT_CONVERT; + ExtOp = ISD::BITCAST; + TruncOp = ISD::BITCAST; } else if (Node->getValueType(0).isInteger()) { ExtOp = ISD::ANY_EXTEND; TruncOp = ISD::TRUNCATE; @@ -3375,12 +3531,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, cast<ShuffleVectorSDNode>(Node)->getMask(Mask); // Cast the two input vectors. - Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0)); - Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1)); // Convert the shuffle mask to the right # elements. Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask); - Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1); Results.push_back(Tmp1); break; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 650ee5a..2775212 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -55,7 +55,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); - case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N)); @@ -102,7 +102,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { SetSoftenedFloat(SDValue(N, ResNo), R); } -SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -133,8 +133,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { unsigned Size = NVT.getSizeInBits(); // Mask = ~(1 << (Size-1)) - SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1), - NVT); + APInt API = APInt::getAllOnesValue(Size); + API.clearBit(Size-1); + SDValue Mask = DAG.getConstant(API, NVT); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask); } @@ -455,7 +456,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getSrcValue(), L->getSrcValueOffset(), NVT, + L->getPointerInfo(), NVT, L->isVolatile(), L->isNonTemporal(), L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -466,8 +467,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { // Do a non-extending load followed by FP_EXTEND. NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(), dl, L->getChain(), - L->getBasePtr(), L->getOffset(), - L->getSrcValue(), L->getSrcValueOffset(), + L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), L->isNonTemporal(), L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to @@ -558,7 +558,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { #endif llvm_unreachable("Do not know how to soften this operator's operand!"); - case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; @@ -670,8 +670,8 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, } } -SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) { - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), +SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); } @@ -780,7 +780,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { Val = GetSoftenedFloat(Val); return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), - ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } @@ -816,7 +816,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; - case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; @@ -1110,9 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); - Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr, - LD->getSrcValue(), LD->getSrcValueOffset(), - LD->getMemoryVT(), LD->isVolatile(), + Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, + LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); // Remember the chain. @@ -1222,7 +1221,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { #endif llvm_unreachable("Do not know how to expand this operator's operand!"); - case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; @@ -1421,7 +1420,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { GetExpandedOp(ST->getValue(), Lo, Hi); return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, - ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index f8c5890..f0752df 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -49,7 +49,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { llvm_unreachable("Do not know how to promote this operator!"); case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; - case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break; case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; case ISD::Constant: Res = PromoteIntRes_Constant(N); break; @@ -143,7 +143,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(), N->getChain(), N->getBasePtr(), - Op2, N->getSrcValue(), N->getAlignment()); + Op2, N->getMemOperand()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -155,14 +155,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { SDValue Op3 = GetPromotedInteger(N->getOperand(3)); SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(), N->getChain(), N->getBasePtr(), - Op2, Op3, N->getSrcValue(), N->getAlignment()); + Op2, Op3, N->getMemOperand()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; } -SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); @@ -179,8 +179,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { case PromoteInteger: if (NOutVT.bitsEq(NInVT)) // The input promotes to the same size. Convert the promoted value. - return DAG.getNode(ISD::BIT_CONVERT, dl, - NOutVT, GetPromotedInteger(InOp)); + return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); break; case SoftenFloat: // Promote the integer operand by hand. @@ -193,7 +192,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, BitConvertToInteger(GetScalarizedVector(InOp))); case SplitVector: { - // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split + // For example, i32 = BITCAST v2i16 on alpha. Convert the split // pieces of the input into integers and reassemble in the final type. SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); @@ -207,12 +206,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()), JoinIntegers(Lo, Hi)); - return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp); + return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); } case WidenVector: if (OutVT.bitsEq(NInVT)) // The input is widened to the same size. Convert to the widened value. - return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp)); + return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp)); } return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, @@ -293,7 +292,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { // value was zero. This can be handled by setting the bit just off // the top of the original type. APInt TopBit(NVT.getSizeInBits(), 0); - TopBit.set(OVT.getSizeInBits()); + TopBit.setBit(OVT.getSizeInBits()); Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); return DAG.getNode(ISD::CTTZ, dl, NVT, Op); } @@ -371,8 +370,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); DebugLoc dl = N->getDebugLoc(); - SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(), - N->getSrcValue(), N->getSrcValueOffset(), + SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), + N->getPointerInfo(), N->getMemoryVT(), N->isVolatile(), N->isNonTemporal(), N->getAlignment()); @@ -549,6 +548,48 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { return Res; } +SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { + // Promote the overflow bit trivially. + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + EVT SmallVT = LHS.getValueType(); + + // To determine if the result overflowed in a larger type, we extend the input + // to the larger type, do the multiply, then check the high bits of the result + // to see if the overflow happened. + if (N->getOpcode() == ISD::SMULO) { + LHS = SExtPromotedInteger(LHS); + RHS = SExtPromotedInteger(RHS); + } else { + LHS = ZExtPromotedInteger(LHS); + RHS = ZExtPromotedInteger(RHS); + } + SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + + // Overflow occurred iff the high part of the result does not zero/sign-extend + // the low part. + SDValue Overflow; + if (N->getOpcode() == ISD::UMULO) { + // Unsigned overflow occurred iff the high part is non-zero. + SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, + DAG.getIntPtrConstant(SmallVT.getSizeInBits())); + Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, + DAG.getConstant(0, Hi.getValueType()), ISD::SETNE); + } else { + // Signed overflow occurred iff the high part does not sign extend the low. + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(), + Mul, DAG.getValueType(SmallVT)); + Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE); + } + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Overflow); + return Mul; +} + SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { // Zero extend the input. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); @@ -602,11 +643,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { return Res; } -SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { - assert(ResNo == 1 && "Only boolean result promotion currently supported!"); - return PromoteIntRes_Overflow(N); -} - //===----------------------------------------------------------------------===// // Integer Operand Promotion //===----------------------------------------------------------------------===// @@ -631,7 +667,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { llvm_unreachable("Do not know how to promote this operator's operand!"); case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break; - case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break; case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break; case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break; case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break; @@ -713,7 +749,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op); } -SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) { // This should only occur in unusual situations like bitcasting to an // x86_fp80, so just turn it into a store+load return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); @@ -889,7 +925,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); - int SVOffset = N->getSrcValueOffset(); unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); @@ -898,8 +933,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. // Truncate the value and store the result. - return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(), - SVOffset, N->getMemoryVT(), + return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(), + N->getMemoryVT(), isVolatile, isNonTemporal, Alignment); } @@ -951,7 +986,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; - case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; @@ -978,6 +1013,23 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break; case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: { + std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N); + SplitInteger(Tmp.first, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Tmp.second); + break; + } + case ISD::AND: case ISD::OR: case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break; @@ -999,6 +1051,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; case ISD::UADDO: case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; + case ISD::UMULO: + case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1006,11 +1060,98 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { SetExpandedInteger(SDValue(N, ResNo), Lo, Hi); } +/// Lower an atomic node to the appropriate builtin call. +std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { + unsigned Opc = Node->getOpcode(); + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC; + + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic intrinsic Expand!"); + break; + case ISD::ATOMIC_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; + case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; + case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; + case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + } + break; + case ISD::ATOMIC_CMP_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; + case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; + case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; + case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + } + break; + case ISD::ATOMIC_LOAD_ADD: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + } + break; + case ISD::ATOMIC_LOAD_SUB: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + } + break; + case ISD::ATOMIC_LOAD_AND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + } + break; + case ISD::ATOMIC_LOAD_OR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + } + break; + case ISD::ATOMIC_LOAD_XOR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + } + break; + case ISD::ATOMIC_LOAD_NAND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + } + break; + } + + return ExpandChainLibCall(LC, Node, false); +} + /// ExpandShiftByConstant - N is a shift by a value that needs to be expanded, /// and the shift amount is a constant 'Amt'. Expand the operation. void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + DebugLoc DL = N->getDebugLoc(); // Expand the incoming operand to be shifted, so that we have its parts SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); @@ -1025,8 +1166,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, Lo = Hi = DAG.getConstant(0, NVT); } else if (Amt > NVTBits) { Lo = DAG.getConstant(0, NVT); - Hi = DAG.getNode(ISD::SHL, dl, - NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy)); + Hi = DAG.getNode(ISD::SHL, DL, + NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy)); } else if (Amt == NVTBits) { Lo = DAG.getConstant(0, NVT); Hi = InL; @@ -1034,17 +1175,17 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, TLI.isOperationLegalOrCustom(ISD::ADDC, TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { // Emit this X << 1 as X+X. - SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); + SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); SDValue LoOps[2] = { InL, InL }; - Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2); SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; - Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3); } else { - Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy)); - Hi = DAG.getNode(ISD::OR, dl, NVT, - DAG.getNode(ISD::SHL, dl, NVT, InH, + Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)); + Hi = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SHL, DL, NVT, InH, DAG.getConstant(Amt, ShTy)), - DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getNode(ISD::SRL, DL, NVT, InL, DAG.getConstant(NVTBits-Amt, ShTy))); } return; @@ -1055,43 +1196,43 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, Lo = DAG.getConstant(0, NVT); Hi = DAG.getConstant(0, NVT); } else if (Amt > NVTBits) { - Lo = DAG.getNode(ISD::SRL, dl, + Lo = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy)); Hi = DAG.getConstant(0, NVT); } else if (Amt == NVTBits) { Lo = InH; Hi = DAG.getConstant(0, NVT); } else { - Lo = DAG.getNode(ISD::OR, dl, NVT, - DAG.getNode(ISD::SRL, dl, NVT, InL, + Lo = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SRL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)), - DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getNode(ISD::SHL, DL, NVT, InH, DAG.getConstant(NVTBits-Amt, ShTy))); - Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); + Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy)); } return; } assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); if (Amt > VTBits) { - Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, + Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(NVTBits-1, ShTy)); } else if (Amt > NVTBits) { - Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, + Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt-NVTBits, ShTy)); - Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(NVTBits-1, ShTy)); } else if (Amt == NVTBits) { Lo = InH; - Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(NVTBits-1, ShTy)); } else { - Lo = DAG.getNode(ISD::OR, dl, NVT, - DAG.getNode(ISD::SRL, dl, NVT, InL, + Lo = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SRL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)), - DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getNode(ISD::SHL, DL, NVT, InH, DAG.getConstant(NVTBits-Amt, ShTy))); - Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy)); } } @@ -1269,7 +1410,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support // them. TODO: Teach operation legalization how to expand unsupported // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate - // a carry of type MVT::Flag, but there doesn't seem to be any way to + // a carry of type MVT::Glue, but there doesn't seem to be any way to // generate a value of this type in the expanded code sequence. bool hasCarry = TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? @@ -1277,7 +1418,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); if (hasCarry) { - SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); + SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); HiOps[2] = Lo.getValue(1); @@ -1287,31 +1428,32 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, HiOps[2] = Lo.getValue(1); Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); } + return; + } + + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); + SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], + ISD::SETULT); + SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], + ISD::SETULT); + SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, + DAG.getConstant(1, NVT), Carry1); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); } else { - if (N->getOpcode() == ISD::ADD) { - Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); - Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); - SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], - ISD::SETULT); - SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); - SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], - ISD::SETULT); - SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, - DAG.getConstant(1, NVT), Carry1); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); - } else { - Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); - Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); - SDValue Cmp = - DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), - LoOps[0], LoOps[1], ISD::SETULT); - SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); - Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); - } + Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); + SDValue Cmp = + DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), + LoOps[0], LoOps[1], ISD::SETULT); + SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } @@ -1322,7 +1464,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, DebugLoc dl = N->getDebugLoc(); GetExpandedInteger(N->getOperand(0), LHSL, LHSH); GetExpandedInteger(N->getOperand(1), RHSL, RHSH); - SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); SDValue LoOps[2] = { LHSL, RHSL }; SDValue HiOps[3] = { LHSH, RHSH }; @@ -1348,7 +1490,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, DebugLoc dl = N->getDebugLoc(); GetExpandedInteger(N->getOperand(0), LHSL, LHSH); GetExpandedInteger(N->getOperand(1), RHSL, RHSH); - SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; SDValue HiOps[3] = { LHSH, RHSH }; @@ -1437,7 +1579,7 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NBitWidth = NVT.getSizeInBits(); const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue(); - Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT); + Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT); Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT); } @@ -1524,7 +1666,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); ISD::LoadExtType ExtType = N->getExtensionType(); - int SVOffset = N->getSrcValueOffset(); unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); @@ -1535,7 +1676,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, if (N->getMemoryVT().bitsLE(NVT)) { EVT MemVT = N->getMemoryVT(); - Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT, isVolatile, isNonTemporal, Alignment); // Remember the chain. @@ -1557,7 +1698,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. - Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), isVolatile, isNonTemporal, Alignment); unsigned ExcessBits = @@ -1568,8 +1709,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), - SVOffset+IncrementSize, NEVT, + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); @@ -1586,7 +1727,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned ExcessBits = (EBytes - IncrementSize)*8; // Load both the high bits and maybe some of the low bits. - Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), isVolatile, isNonTemporal, Alignment); @@ -1595,8 +1736,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); // Load the rest of the low bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(), - SVOffset+IncrementSize, + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); @@ -1987,6 +2128,31 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, ReplaceValueWith(SDValue(N, 1), Ofl); } +void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2); + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); + SplitInteger(Ret, Lo, Hi); + + // Now calculate overflow. + SDValue Ofl; + if (N->getOpcode() == ISD::UMULO) + Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, + DAG.getConstant(0, VT), ISD::SETNE); + else { + SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT); + Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp); + Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE); + } + ReplaceValueWith(SDValue(N, 1), Ofl); +} + void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -2078,7 +2244,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { #endif llvm_unreachable("Do not know how to expand this operator's operand!"); - case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; @@ -2308,7 +2474,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); - int SVOffset = N->getSrcValueOffset(); unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); @@ -2319,14 +2484,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { if (N->getMemoryVT().bitsLE(NVT)) { GetExpandedInteger(N->getValue(), Lo, Hi); - return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), N->getMemoryVT(), isVolatile, isNonTemporal, Alignment); - } else if (TLI.isLittleEndian()) { + } + + if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. GetExpandedInteger(N->getValue(), Lo, Hi); - Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), isVolatile, isNonTemporal, Alignment); unsigned ExcessBits = @@ -2337,50 +2504,49 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), - SVOffset+IncrementSize, NEVT, - isVolatile, isNonTemporal, + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + NEVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - } else { - // Big-endian - high bits are at low addresses. Favor aligned stores at - // the cost of some bit-fiddling. - GetExpandedInteger(N->getValue(), Lo, Hi); - - EVT ExtVT = N->getMemoryVT(); - unsigned EBytes = ExtVT.getStoreSize(); - unsigned IncrementSize = NVT.getSizeInBits()/8; - unsigned ExcessBits = (EBytes - IncrementSize)*8; - EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), - ExtVT.getSizeInBits() - ExcessBits); + } - if (ExcessBits < NVT.getSizeInBits()) { - // Transfer high bits from the top of Lo to the bottom of Hi. - Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, - DAG.getConstant(NVT.getSizeInBits() - ExcessBits, - TLI.getPointerTy())); - Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, - DAG.getNode(ISD::SRL, dl, NVT, Lo, - DAG.getConstant(ExcessBits, - TLI.getPointerTy()))); - } + // Big-endian - high bits are at low addresses. Favor aligned stores at + // the cost of some bit-fiddling. + GetExpandedInteger(N->getValue(), Lo, Hi); + + EVT ExtVT = N->getMemoryVT(); + unsigned EBytes = ExtVT.getStoreSize(); + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), + ExtVT.getSizeInBits() - ExcessBits); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer high bits from the top of Lo to the bottom of Hi. + Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + TLI.getPointerTy())); + Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, + DAG.getNode(ISD::SRL, dl, NVT, Lo, + DAG.getConstant(ExcessBits, + TLI.getPointerTy()))); + } - // Store both the high bits and maybe some of the low bits. - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), - SVOffset, HiVT, isVolatile, isNonTemporal, - Alignment); + // Store both the high bits and maybe some of the low bits. + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), + HiVT, isVolatile, isNonTemporal, Alignment); - // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); - // Store the lowest ExcessBits bits in the second half. - Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), - SVOffset+IncrementSize, - EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - } + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + // Store the lowest ExcessBits bits in the second half. + Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { @@ -2460,8 +2626,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // Load the value out, extending it from f32 to the destination float type. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(), - FudgePtr, NULL, 0, MVT::f32, + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), + FudgePtr, + MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 6e56c98..cedda7e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -714,6 +714,11 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { if (M->getNodeId() == Processed) RemapValue(NewVal); DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + // OldVal may be a target of the ReplacedValues map which was marked + // NewNode to force reanalysis because it was updated. Ensure that + // anything that ReplacedValues mapped to OldVal will now be mapped + // all the way to NewVal. + ReplacedValues[OldVal] = NewVal; } // The original node continues to exist in the DAG, marked NewNode. } @@ -858,7 +863,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { /// BitConvertToInteger - Convert to an integer of the same size. SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { unsigned BitWidth = Op.getValueType().getSizeInBits(); - return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), + return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(), EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); } @@ -869,7 +874,7 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); unsigned NumElts = Op.getValueType().getVectorNumElements(); - return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), + return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(), EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op); } @@ -880,10 +885,11 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, // the source and destination types. SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0, - false, false, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, + MachinePointerInfo(), false, false, 0); // Result is a load from the stack slot. - return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0); + return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), + false, false, 0); } /// CustomLowerNode - Replace the node's results with custom code provided @@ -1049,6 +1055,39 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, return CallInfo.first; } +// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair<SDValue, SDValue> +DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, + bool isSigned) { + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); + + return CallInfo; +} + /// PromoteTargetBoolean - Promote the given target boolean to a target boolean /// of the given type. A target boolean is an integer value, not necessarily of /// type i1, the bits of which conform to getBooleanContents. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d560292..3f81bbb 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -99,7 +99,7 @@ private: return SoftenFloat; return ExpandFloat; } - + if (VT.getVectorNumElements() == 1) return ScalarizeVector; return SplitVector; @@ -192,6 +192,10 @@ private: SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, DebugLoc dl); + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); + std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); + SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); @@ -244,7 +248,7 @@ private: SDValue PromoteIntRes_AssertZext(SDNode *N); SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); - SDValue PromoteIntRes_BIT_CONVERT(SDNode *N); + SDValue PromoteIntRes_BITCAST(SDNode *N); SDValue PromoteIntRes_BSWAP(SDNode *N); SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); SDValue PromoteIntRes_Constant(SDNode *N); @@ -278,7 +282,7 @@ private: // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); - SDValue PromoteIntOp_BIT_CONVERT(SDNode *N); + SDValue PromoteIntOp_BITCAST(SDNode *N); SDValue PromoteIntOp_BUILD_PAIR(SDNode *N); SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo); @@ -344,6 +348,7 @@ private: void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UMULSMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); @@ -352,7 +357,7 @@ private: // Integer Operand Expansion. bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); - SDValue ExpandIntOp_BIT_CONVERT(SDNode *N); + SDValue ExpandIntOp_BITCAST(SDNode *N); SDValue ExpandIntOp_BR_CC(SDNode *N); SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N); SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N); @@ -387,7 +392,7 @@ private: // Result Float to Integer Conversion. void SoftenFloatResult(SDNode *N, unsigned OpNo); - SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N); + SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); @@ -426,7 +431,7 @@ private: // Operand Float to Integer Conversion. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); - SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N); + SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); @@ -515,7 +520,7 @@ private: SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_InregOp(SDNode *N); - SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N); + SDValue ScalarizeVecRes_BITCAST(SDNode *N); SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FPOWI(SDNode *N); @@ -532,7 +537,7 @@ private: // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); - SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N); + SDValue ScalarizeVecOp_BITCAST(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -557,7 +562,7 @@ private: void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -577,11 +582,12 @@ private: bool SplitVectorOperand(SDNode *N, unsigned OpNo); SDValue SplitVecOp_UnaryOp(SDNode *N); - SDValue SplitVecOp_BIT_CONVERT(SDNode *N); + SDValue SplitVecOp_BITCAST(SDNode *N); SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); + SDValue SplitVecOp_FP_ROUND(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Support: LegalizeVectorTypes.cpp @@ -603,7 +609,7 @@ private: // Widen Vector Result Promotion. void WidenVectorResult(SDNode *N, unsigned ResNo); - SDValue WidenVecRes_BIT_CONVERT(SDNode* N); + SDValue WidenVecRes_BITCAST(SDNode* N); SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); @@ -628,7 +634,7 @@ private: // Widen Vector Operand. bool WidenVectorOperand(SDNode *N, unsigned ResNo); - SDValue WidenVecOp_BIT_CONVERT(SDNode *N); + SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); @@ -721,7 +727,7 @@ private: } // Generic Result Expansion. - void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -729,7 +735,7 @@ private: void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi); // Generic Operand Expansion. - SDValue ExpandOp_BIT_CONVERT (SDNode *N); + SDValue ExpandOp_BITCAST (SDNode *N); SDValue ExpandOp_BUILD_VECTOR (SDNode *N); SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N); SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 9c2b1d9..a75ae87 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -32,8 +32,7 @@ using namespace llvm; // little/big-endian machines, followed by the Hi/Lo part. This means that // they cannot be used as is on vectors, for which Lo is always stored first. -void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, - SDValue &Hi) { +void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); SDValue InOp = N->getOperand(0); @@ -50,31 +49,31 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, case SoftenFloat: // Convert the integer operand instead. SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case ExpandInteger: case ExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case SplitVector: GetSplitVector(InOp, Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case ScalarizeVector: // Convert the element instead. SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case WidenVector: { - assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT"); + assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), InVT.getVectorNumElements()/2); @@ -84,19 +83,19 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(InNVT.getVectorNumElements())); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; } } if (InVT.isVector() && OutVT.isInteger()) { - // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand + // Handle cases like i64 = BITCAST v1i64 on x86, where the operand // is legal but the result is not. EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2); if (isTypeLegal(NVT)) { - SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp); + SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp); Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, DAG.getIntPtrConstant(0)); Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, @@ -119,14 +118,14 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0, + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo, false, false, 0); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -134,7 +133,8 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(IncrementSize)); // Load the second half from the stack slot. - Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false, + Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, + PtrInfo.getWithOffset(IncrementSize), false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. @@ -172,7 +172,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT OldVT = N->getValueType(0); EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); - SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts), OldVec); @@ -204,22 +204,21 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); - int SVOffset = LD->getSrcValueOffset(); unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); - Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), isVolatile, isNonTemporal, Alignment); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset+IncrementSize, + Hi = DAG.getLoad(NVT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); @@ -262,14 +261,14 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { // Generic Operand Expansion. //===--------------------------------------------------------------------===// -SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { DebugLoc dl = N->getDebugLoc(); if (N->getValueType(0).isVector()) { // An illegal expanding type is being converted to a legal vector type. // Make a two element vector out of the expanded parts and convert that // instead, but only if the new vector type is legal (otherwise there // is no point, and it might create expansion loops). For example, on - // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32. + // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32. EVT OVT = N->getOperand(0).getValueType(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), @@ -283,7 +282,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { std::swap(Parts[0], Parts[1]); SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); - return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec); + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } } @@ -322,7 +321,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { &NewElts[0], NewElts.size()); // Convert the new vector to the old vector type. - return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); + return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); } SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { @@ -347,7 +346,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { // Bitconvert to a vector of twice the length with elements of the expanded // type, insert the expanded vector elements, and then convert back. EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2); - SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, NewVecVT, N->getOperand(0)); SDValue Lo, Hi; @@ -363,7 +362,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); // Convert the new vector to the old vector type. - return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); + return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); } SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { @@ -390,7 +389,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { St->getValue().getValueType()); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); - int SVOffset = St->getSrcValueOffset(); unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); bool isNonTemporal = St->isNonTemporal(); @@ -404,14 +402,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset, + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), isVolatile, isNonTemporal, Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); - Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(), - SVOffset + IncrementSize, + Hi = DAG.getStore(Chain, dl, Hi, Ptr, + St->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 621c087..167dbe0 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -241,14 +241,14 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { for (unsigned j = 0; j != Op.getNumOperands(); ++j) { if (Op.getOperand(j).getValueType().isVector()) - Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j)); + Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); else Operands[j] = Op.getOperand(j); } Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); + return DAG.getNode(ISD::BITCAST, dl, VT, Op); } SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 93bc2d0..182f8fc 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -46,7 +46,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to scalarize the result of this operator!"); - case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break; + case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: R = N->getOperand(0); break; case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; @@ -122,9 +122,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NewVT, N->getOperand(0)); } @@ -171,7 +171,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getDebugLoc(), N->getChain(), N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), - N->getSrcValue(), N->getSrcValueOffset(), + N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), N->getOriginalAlignment()); @@ -296,8 +296,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { dbgs() << "\n"; #endif llvm_unreachable("Do not know how to scalarize this operator's operand!"); - case ISD::BIT_CONVERT: - Res = ScalarizeVecOp_BIT_CONVERT(N); + case ISD::BITCAST: + Res = ScalarizeVecOp_BITCAST(N); break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); @@ -326,11 +326,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { return false; } -/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs +/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs /// to be scalarized, it must be <1 x ty>. Convert the element instead. -SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { SDValue Elt = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), Elt); } @@ -365,14 +365,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ if (N->isTruncatingStore()) return DAG.getTruncStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), - N->getBasePtr(), - N->getSrcValue(), N->getSrcValueOffset(), + N->getBasePtr(), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), N->getAlignment()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), - N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), + N->getBasePtr(), N->getPointerInfo(), N->isVolatile(), N->isNonTemporal(), N->getOriginalAlignment()); } @@ -407,7 +406,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; - case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break; @@ -497,8 +496,8 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); } -void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, - SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, + SDValue &Hi) { // We know the result is a vector. The input may be either a vector or a // scalar value. EVT LoVT, HiVT; @@ -526,8 +525,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, GetExpandedOp(InOp, Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); return; } break; @@ -535,8 +534,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, // If the input is a vector that needs to be split, convert each split // piece of the input now. GetSplitVector(InOp, Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); return; } @@ -550,8 +549,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); } void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, @@ -626,9 +625,9 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0)); VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); break; } } @@ -646,16 +645,15 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); SDValue Idx = N->getOperand(1); - EVT IdxVT = Idx.getValueType(); DebugLoc dl = N->getDebugLoc(); EVT LoVT, HiVT; GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); - Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx, - DAG.getConstant(LoVT.getVectorNumElements(), IdxVT)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx); + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, + DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements())); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, @@ -705,8 +703,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0, - false, false, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); // Store the new element. This may be larger than the vector element type, // so use a truncating store. @@ -714,11 +712,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = TLI.getTargetData()->getPrefTypeAlignment(VecType); - Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT, + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, false, false, 0); // Load the Lo part from the stack slot. - Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0, + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), false, false, 0); // Increment the pointer to the other part. @@ -727,8 +725,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(IncrementSize)); // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false, - false, MinAlign(Alignment, IncrementSize)); + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -751,8 +749,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue Ch = LD->getChain(); SDValue Ptr = LD->getBasePtr(); SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); - const Value *SV = LD->getSrcValue(); - int SVOffset = LD->getSrcValueOffset(); EVT MemoryVT = LD->getMemoryVT(); unsigned Alignment = LD->getOriginalAlignment(); bool isVolatile = LD->isVolatile(); @@ -762,14 +758,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, - SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment); + LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, + Alignment); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - SVOffset += IncrementSize; Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, - SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment); + LD->getPointerInfo().getWithOffset(IncrementSize), + HiMemVT, isVolatile, isNonTemporal, Alignment); // Build a factor node to remember that this load is independent of the // other one. @@ -980,10 +977,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { #endif llvm_unreachable("Do not know how to split this operator's operand!"); - case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; @@ -995,6 +993,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::FP_EXTEND: + case ISD::FTRUNC: case ISD::TRUNCATE: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: @@ -1036,8 +1036,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } -SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) { - // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will +SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { + // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will // end up being split all the way down to individual components. Convert the // split pieces into integers and reassemble. SDValue Lo, Hi; @@ -1048,13 +1048,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) { if (TLI.isBigEndian()) std::swap(Lo, Hi); - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), JoinIntegers(Lo, Hi)); } SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { - // We know that the extracted result type is legal. For now, assume the index - // is a constant. + // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); SDValue Idx = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); @@ -1099,15 +1098,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { EVT EltVT = VecVT.getVectorElementType(); DebugLoc dl = N->getDebugLoc(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(SPFI); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0, - false, false, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); - return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr, - SV, 0, EltVT, false, false, 0); + return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + MachinePointerInfo(), EltVT, false, false, 0); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -1118,7 +1115,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { bool isTruncating = N->isTruncatingStore(); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); - int SVOffset = N->getSrcValueOffset(); EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); @@ -1132,22 +1128,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) - Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT, isVol, isNT, Alignment); else - Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), isVol, isNT, Alignment); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - SVOffset += IncrementSize; if (isTruncating) - Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, isVol, isNT, Alignment); else - Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getStore(Ch, DL, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), isVol, isNT, Alignment); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); @@ -1155,7 +1152,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { DebugLoc DL = N->getDebugLoc(); - + // The input operands all must have the same type, and we know the result the // result type is valid. Convert this to a buildvector which extracts all the // input elements. @@ -1172,11 +1169,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { } } - + return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), &Elts[0], Elts.size()); } +SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + DebugLoc DL = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + EVT InVT = Lo.getValueType(); + + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); +} + + //===----------------------------------------------------------------------===// // Result Vector Widening @@ -1201,7 +1216,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to widen the result of this operator!"); - case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; @@ -1297,7 +1312,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); - while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) { + while (!TLI.isTypeLegal(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); } @@ -1308,11 +1323,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { SDValue InOp2 = GetWidenedVector(N->getOperand(1)); return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); } - + // No legal vector version so unroll the vector operation and then widen. if (NumElts == 1) return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); - + // Since the operation can trap, apply operation on the original vector. EVT MaxVT = VT; SDValue InOp1 = GetWidenedVector(N->getOperand(0)); @@ -1323,7 +1338,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { unsigned ConcatEnd = 0; // Current ConcatOps index. int Idx = 0; // Current Idx into input vectors. - // NumElts := greatest synthesizable vector size (at most WidenVT) + // NumElts := greatest legal vector size (at most WidenVT) // while (orig. vector has unhandled elements) { // take munches of size NumElts from the beginning and add to ConcatOps // NumElts := next smaller supported vector size or 1 @@ -1341,13 +1356,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { do { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); - } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1); + } while (!TLI.isTypeLegal(VT) && NumElts != 1); if (NumElts == 1) { for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1, DAG.getIntPtrConstant(Idx)); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, DAG.getIntPtrConstant(Idx)); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2); @@ -1378,7 +1393,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { do { NextSize *= 2; NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); - } while (!TLI.isTypeSynthesizable(NextVT)); + } while (!TLI.isTypeLegal(NextVT)); if (!VT.isVector()) { // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT @@ -1415,7 +1430,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { if (VT == WidenVT) return ConcatOps[0]; } - + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); if (NumOps != ConcatEnd ) { @@ -1428,7 +1443,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue InOp = N->getOperand(0); - DebugLoc dl = N->getDebugLoc(); + DebugLoc DL = N->getDebugLoc(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); @@ -1444,11 +1459,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); InVTNumElts = InVT.getVectorNumElements(); - if (InVTNumElts == WidenNumElts) - return DAG.getNode(Opcode, dl, WidenVT, InOp); + if (InVTNumElts == WidenNumElts) { + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InOp); + return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1)); + } } - if (TLI.isTypeSynthesizable(InWidenVT)) { + if (TLI.isTypeLegal(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1462,16 +1480,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(InVT); for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - return DAG.getNode(Opcode, dl, WidenVT, - DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, - &Ops[0], NumConcat)); + SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, + &Ops[0], NumConcat); + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InVec); + return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1)); } if (InVTNumElts % WidenNumElts == 0) { + SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, + InOp, DAG.getIntPtrConstant(0)); // Extract the input and convert the shorten input vector. - return DAG.getNode(Opcode, dl, WidenVT, - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, - InOp, DAG.getIntPtrConstant(0))); + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InVal); + return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1)); } } @@ -1480,16 +1502,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { EVT EltVT = WidenVT.getVectorElementType(); unsigned MinElts = std::min(InVTNumElts, WidenNumElts); unsigned i; - for (i=0; i < MinElts; ++i) - Ops[i] = DAG.getNode(Opcode, dl, EltVT, - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getIntPtrConstant(i))); + for (i=0; i < MinElts; ++i) { + SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getIntPtrConstant(i)); + if (N->getNumOperands() == 1) + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); + else + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1)); + } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts); } SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { @@ -1536,7 +1562,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { WidenVT, WidenLHS, DAG.getValueType(ExtVT)); } -SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); EVT VT = N->getValueType(0); @@ -1555,7 +1581,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { InOp = GetPromotedInteger(InOp); InVT = InOp.getValueType(); if (WidenVT.bitsEq(InVT)) - return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); break; case SoftenFloat: case ExpandInteger: @@ -1570,13 +1596,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { InVT = InOp.getValueType(); if (WidenVT.bitsEq(InVT)) // The input widens to the same size. Convert to the widen value. - return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); break; } unsigned WidenSize = WidenVT.getSizeInBits(); unsigned InSize = InVT.getSizeInBits(); - if (WidenSize % InSize == 0) { + // x86mmx is not an acceptable vector element type, so don't try. + if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) { // Determine new input vector type. The new input vector type will use // the same element type (if its a vector) or use the input type as a // vector. It is the same size as the type to widen to. @@ -1590,7 +1617,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } - if (TLI.isTypeSynthesizable(NewInVT)) { + if (TLI.isTypeLegal(NewInVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1609,7 +1636,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { else NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, &Ops[0], NewNumElts); - return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); } } @@ -1730,7 +1757,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SatOp, CvtCode); } - if (TLI.isTypeSynthesizable(InWidenVT)) { + if (TLI.isTypeLegal(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1794,39 +1821,25 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT InVT = InOp.getValueType(); - ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx); - if (CIdx) { - unsigned IdxVal = CIdx->getZExtValue(); - // Check if we can just return the input vector after widening. - if (IdxVal == 0 && InVT == WidenVT) - return InOp; - - // Check if we can extract from the vector. - unsigned InNumElts = InVT.getVectorNumElements(); - if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); - } + // Check if we can just return the input vector after widening. + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + if (IdxVal == 0 && InVT == WidenVT) + return InOp; + + // Check if we can extract from the vector. + unsigned InNumElts = InVT.getVectorNumElements(); + if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); // We could try widening the input to the right length but for now, extract // the original elements, fill the rest with undefs and build a vector. SmallVector<SDValue, 16> Ops(WidenNumElts); EVT EltVT = VT.getVectorElementType(); - EVT IdxVT = Idx.getValueType(); unsigned NumElts = VT.getVectorNumElements(); unsigned i; - if (CIdx) { - unsigned IdxVal = CIdx->getZExtValue(); - for (i=0; i < NumElts; ++i) - Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(IdxVal+i, IdxVT)); - } else { - Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx); - for (i=1; i < NumElts; ++i) { - SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, - DAG.getConstant(i, IdxVT)); - Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx); - } - } + for (i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(IdxVal+i)); SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) @@ -1985,7 +1998,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to widen this operator's operand!"); - case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; @@ -2044,7 +2057,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); } -SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { EVT VT = N->getValueType(0); SDValue InOp = GetWidenedVector(N->getOperand(0)); EVT InWidenVT = InOp.getValueType(); @@ -2053,11 +2066,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { // Check if we can convert between two legal vector types and extract. unsigned InWidenSize = InWidenVT.getSizeInBits(); unsigned Size = VT.getSizeInBits(); - if (InWidenSize % Size == 0 && !VT.isVector()) { + // x86mmx is not an acceptable vector element type, so don't try. + if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) { unsigned NewNumElts = InWidenSize / Size; EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); - if (TLI.isTypeSynthesizable(NewVT)) { - SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); + if (TLI.isTypeLegal(NewVT)) { + SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, DAG.getIntPtrConstant(0)); } @@ -2146,7 +2160,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, if (Width == WidenEltWidth) return RetVT; - // See if there is larger legal integer than the element type to load/store + // See if there is larger legal integer than the element type to load/store unsigned VT; for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { @@ -2154,7 +2168,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (MemVT.getSizeInBits() <= WidenEltWidth) break; - if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 && + if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { RetVT = MemVT; @@ -2168,7 +2182,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { EVT MemVT = (MVT::SimpleValueType) VT; unsigned MemVTWidth = MemVT.getSizeInBits(); - if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() && + if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { @@ -2201,7 +2215,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, if (NewLdTy != LdTy) { NumElts = Width / NewLdTy.getSizeInBits(); NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); - VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); + VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp); // Readjust position and vector position based on new load type Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); LdTy = NewLdTy; @@ -2209,11 +2223,11 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], DAG.getIntPtrConstant(Idx++)); } - return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp); + return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } -SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, - LoadSDNode * LD) { +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, + LoadSDNode *LD) { // The strategy assumes that we can efficiently load powers of two widths. // The routines chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector @@ -2228,11 +2242,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, // Load information SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); - int SVOffset = LD->getSrcValueOffset(); unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); - const Value *SV = LD->getSrcValue(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2241,7 +2253,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset, + SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), isVolatile, isNonTemporal, Align); LdChain.push_back(LdOp.getValue(1)); @@ -2251,7 +2263,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, unsigned NumElts = WidenWidth / NewVTWidth; EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); - return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); } if (NewVT == WidenVT) return LdOp; @@ -2286,8 +2298,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, NewVTWidth = NewVT.getSizeInBits(); } - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, - SVOffset+Offset, isVolatile, + SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), + isVolatile, isNonTemporal, MinAlign(Align, Increment)); LdChain.push_back(LdOp.getValue(1)); LdOps.push_back(LdOp); @@ -2300,7 +2313,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, if (!LdOps[0].getValueType().isVector()) // All the loads are scalar loads. return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); - + // If the load contains vectors, build the vector using concat vector. // All of the vectors used to loads are power of 2 and the scalars load // can be combined to make a power of 2 vector. @@ -2362,11 +2375,9 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, // Load information SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); - int SVOffset = LD->getSrcValueOffset(); unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); - const Value *SV = LD->getSrcValue(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); @@ -2376,16 +2387,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(WidenNumElts); unsigned Increment = LdEltVT.getSizeInBits() / 8; - Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset, + Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, + LD->getPointerInfo(), LdEltVT, isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Offset)); - Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV, - SVOffset + Offset, LdEltVT, isVolatile, - isNonTemporal, Align); + Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, + LD->getPointerInfo().getWithOffset(Offset), LdEltVT, + isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[i].getValue(1)); } @@ -2405,8 +2417,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, // element type or scalar stores. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); - const Value *SV = ST->getSrcValue(); - int SVOffset = ST->getSrcValueOffset(); unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); @@ -2433,9 +2443,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, do { SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, DAG.getIntPtrConstant(Idx)); - StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, - SVOffset + Offset, isVolatile, - isNonTemporal, + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo().getWithOffset(Offset), + isVolatile, isNonTemporal, MinAlign(Align, Offset))); StWidth -= NewVTWidth; Offset += Increment; @@ -2447,15 +2457,16 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, // Cast the vector to the scalar type we can store unsigned NumElts = ValWidth / NewVTWidth; EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); - SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); + SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); // Readjust index position based on new vector type Idx = Idx * ValEltWidth / NewVTWidth; do { SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, DAG.getIntPtrConstant(Idx++)); - StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, - SVOffset + Offset, isVolatile, - isNonTemporal, MinAlign(Align, Offset))); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo().getWithOffset(Offset), + isVolatile, isNonTemporal, + MinAlign(Align, Offset))); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, @@ -2474,14 +2485,12 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, // and then store it. Instead, we extract each element and then store it. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); - const Value *SV = ST->getSrcValue(); - int SVOffset = ST->getSrcValueOffset(); unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); SDValue ValOp = GetWidenedVector(ST->getValue()); DebugLoc dl = ST->getDebugLoc(); - + EVT StVT = ST->getMemoryVT(); EVT ValVT = ValOp.getValueType(); @@ -2499,8 +2508,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getIntPtrConstant(0)); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV, - SVOffset, StEltVT, + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo(), StEltVT, isVolatile, isNonTemporal, Align)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { @@ -2508,9 +2517,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, BasePtr, DAG.getIntPtrConstant(Offset)); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getIntPtrConstant(0)); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV, - SVOffset + Offset, StEltVT, - isVolatile, isNonTemporal, + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, + ST->getPointerInfo().getWithOffset(Offset), + StEltVT, isVolatile, isNonTemporal, MinAlign(Align, Offset))); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index ac2d338..2dcb229 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -16,7 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/DebugLoc.h" -#include "llvm/System/DataTypes.h" +#include "llvm/Support/DataTypes.h" namespace llvm { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index fae2729..e3da208 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -205,7 +205,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled /// successors to the newly created node. SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { - if (SU->getNode()->getFlaggedNode()) + if (SU->getNode()->getGluedNode()) return NULL; SDNode *N = SU->getNode(); @@ -216,7 +216,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); - if (VT == MVT::Flag) + if (VT == MVT::Glue) return NULL; else if (VT == MVT::Other) TryUnfold = true; @@ -224,7 +224,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); EVT VT = Op.getNode()->getValueType(Op.getResNo()); - if (VT == MVT::Flag) + if (VT == MVT::Glue) return NULL; } @@ -476,12 +476,12 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, } } - for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { if (Node->getOpcode() == ISD::INLINEASM) { // Inline asm can clobber physical defs. unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) - --NumOps; // Ignore the flag operand. + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index 56f5ded..430283d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -40,7 +40,7 @@ STATISTIC(NumStalls, "Number of pipeline stalls"); static RegisterScheduler tdListDAGScheduler("list-td", "Top-down list scheduler", createTDListDAGScheduler); - + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGList - The actual list scheduler implementation. This supports @@ -51,7 +51,7 @@ private: /// AvailableQueue - The priority queue to use for the available SUnits. /// SchedulingPriorityQueue *AvailableQueue; - + /// PendingQueue - This contains all of the instructions whose operands have /// been issued, but their results are not ready yet (due to the latency of /// the operation). Once the operands become available, the instruction is @@ -63,11 +63,12 @@ private: public: ScheduleDAGList(MachineFunction &mf, - SchedulingPriorityQueue *availqueue, - ScheduleHazardRecognizer *HR) - : ScheduleDAGSDNodes(mf), - AvailableQueue(availqueue), HazardRec(HR) { - } + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) { + + const TargetMachine &tm = mf.getTarget(); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + } ~ScheduleDAGList() { delete HazardRec; @@ -87,14 +88,14 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGList::Schedule() { DEBUG(dbgs() << "********** List Scheduling **********\n"); - + // Build the scheduling graph. BuildSchedGraph(NULL); AvailableQueue->initNodes(SUnits); - + ListScheduleTopDown(); - + AvailableQueue->releaseState(); } @@ -118,7 +119,7 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { --SuccSU->NumPredsLeft; SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); - + // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) @@ -142,7 +143,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); - + Sequence.push_back(SU); assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); SU->setDepthToAtLeast(CurCycle); @@ -168,7 +169,7 @@ void ScheduleDAGList::ListScheduleTopDown() { SUnits[i].isAvailable = true; } } - + // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector<SUnit*> NotReady; @@ -187,7 +188,7 @@ void ScheduleDAGList::ListScheduleTopDown() { assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); } } - + // If there are no instructions available, don't try to issue anything, and // don't advance the hazard recognizer. if (AvailableQueue->empty()) { @@ -196,24 +197,24 @@ void ScheduleDAGList::ListScheduleTopDown() { } SUnit *FoundSUnit = 0; - + bool HasNoopHazards = false; while (!AvailableQueue->empty()) { SUnit *CurSUnit = AvailableQueue->pop(); - + ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(CurSUnit); + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); if (HT == ScheduleHazardRecognizer::NoHazard) { FoundSUnit = CurSUnit; break; } - + // Remember if this is a noop hazard. HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; - + NotReady.push_back(CurSUnit); } - + // Add the nodes that aren't ready back onto the available list. if (!NotReady.empty()) { AvailableQueue->push_all(NotReady); @@ -228,7 +229,7 @@ void ScheduleDAGList::ListScheduleTopDown() { // If this is a pseudo-op node, we don't want to increment the current // cycle. if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! - ++CurCycle; + ++CurCycle; } else if (!HasNoopHazards) { // Otherwise, we have a pipeline stall, but no other problem, just advance // the current cycle and try again. @@ -257,12 +258,8 @@ void ScheduleDAGList::ListScheduleTopDown() { // Public Constructor Functions //===----------------------------------------------------------------------===// -/// createTDListDAGScheduler - This creates a top-down list scheduler with a -/// new hazard recognizer. This scheduler takes ownership of the hazard -/// recognizer and deletes it when done. +/// createTDListDAGScheduler - This creates a top-down list scheduler. ScheduleDAGSDNodes * llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { - return new ScheduleDAGList(*IS->MF, - new LatencyPriorityQueue(), - IS->CreateTargetHazardRecognizer()); + return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue()); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 4c3e4e3..0b548b2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -20,6 +20,7 @@ #include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -65,6 +66,10 @@ static RegisterScheduler "which tries to balance ILP and register pressure", createILPListDAGScheduler); +static cl::opt<bool> DisableSchedCycles( + "disable-sched-cycles", cl::Hidden, cl::init(false), + cl::desc("Disable cycle-level precision during preRA scheduling")); + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -83,31 +88,56 @@ private: /// AvailableQueue - The priority queue to use for the available SUnits. SchedulingPriorityQueue *AvailableQueue; + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// CurCycle - The current scheduler state corresponds to this cycle. + unsigned CurCycle; + + /// MinAvailableCycle - Cycle of the soonest available instruction. + unsigned MinAvailableCycle; + /// LiveRegDefs - A set of physical registers and their definition /// that are "live". These nodes must be scheduled before any other nodes that /// modifies the registers can be scheduled. unsigned NumLiveRegs; std::vector<SUnit*> LiveRegDefs; - std::vector<unsigned> LiveRegCycles; + std::vector<SUnit*> LiveRegGens; /// Topo - A topological ordering for SUnits which permits fast IsReachable /// and similar queries. ScheduleDAGTopologicalSort Topo; public: - ScheduleDAGRRList(MachineFunction &mf, - bool isbottomup, bool needlatency, - SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), - AvailableQueue(availqueue), Topo(SUnits) { - } + ScheduleDAGRRList(MachineFunction &mf, bool needlatency, + SchedulingPriorityQueue *availqueue, + CodeGenOpt::Level OptLevel) + : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), + NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), + Topo(SUnits) { + + const TargetMachine &tm = mf.getTarget(); + if (DisableSchedCycles || !NeedLatency) + HazardRec = new ScheduleHazardRecognizer(); + else + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + } ~ScheduleDAGRRList() { + delete HazardRec; delete AvailableQueue; } void Schedule(); + ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } + /// IsReachable - Checks if SU is reachable from TargetSU. bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { return Topo.IsReachable(SU, TargetSU); @@ -136,24 +166,37 @@ public: } private: + bool isReady(SUnit *SU) { + return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || + AvailableQueue->isReady(SU); + } + void ReleasePred(SUnit *SU, const SDep *PredEdge); - void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ReleasePredecessors(SUnit *SU); void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); void ReleaseSuccessors(SUnit *SU); + void ReleasePending(); + void AdvanceToCycle(unsigned NextCycle); + void AdvancePastStalls(SUnit *SU); + void EmitNode(SUnit *SU); + void ScheduleNodeBottomUp(SUnit*); void CapturePred(SDep *PredEdge); - void ScheduleNodeBottomUp(SUnit*, unsigned); - void ScheduleNodeTopDown(SUnit*, unsigned); void UnscheduleNodeBottomUp(SUnit*); - void BacktrackBottomUp(SUnit*, unsigned, unsigned&); + void RestoreHazardCheckerBottomUp(); + void BacktrackBottomUp(SUnit*, SUnit*); SUnit *CopyAndMoveSuccessors(SUnit*); void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, SmallVector<SUnit*, 2>&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); - void ListScheduleTopDown(); + + SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); + void ScheduleNodeTopDown(SUnit*); + void ListScheduleTopDown(); + /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. /// Updates the topological ordering if required. @@ -190,11 +233,13 @@ private: void ScheduleDAGRRList::Schedule() { DEBUG(dbgs() << "********** List Scheduling BB#" << BB->getNumber() - << " **********\n"); + << " '" << BB->getName() << "' **********\n"); + CurCycle = 0; + MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); - LiveRegCycles.resize(TRI->getNumRegs(), 0); + LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegGens.resize(TRI->getNumRegs(), NULL); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -204,13 +249,15 @@ void ScheduleDAGRRList::Schedule() { Topo.InitDAGTopologicalSorting(); AvailableQueue->initNodes(SUnits); - + + HazardRec->Reset(); + // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. if (isBottomUp) ListScheduleBottomUp(); else ListScheduleTopDown(); - + AvailableQueue->releaseState(); } @@ -243,33 +290,197 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { // to be scheduled. Ignore the special EntrySU node. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { PredSU->isAvailable = true; - AvailableQueue->push(PredSU); + + unsigned Height = PredSU->getHeight(); + if (Height < MinAvailableCycle) + MinAvailableCycle = Height; + + if (isReady(SU)) { + AvailableQueue->push(PredSU); + } + // CapturePred and others may have left the node in the pending queue, avoid + // adding it twice. + else if (!PredSU->isPending) { + PredSU->isPending = true; + PendingQueue.push_back(PredSU); + } } } -void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { +/// Call ReleasePred for each predecessor, then update register live def/gen. +/// Always update LiveRegDefs for a register dependence even if the current SU +/// also defines the register. This effectively create one large live range +/// across a sequence of two-address node. This is important because the +/// entire chain must be scheduled together. Example: +/// +/// flags = (3) add +/// flags = (2) addc flags +/// flags = (1) addc flags +/// +/// results in +/// +/// LiveRegDefs[flags] = 3 +/// LiveRegGens[flags] = 1 +/// +/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid +/// interference on flags. +void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { // Bottom up: release predecessors for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { ReleasePred(SU, &*I); if (I->isAssignedRegDep()) { // This is a physical register dependency and it's impossible or - // expensive to copy the register. Make sure nothing that can + // expensive to copy the register. Make sure nothing that can // clobber the register is scheduled between the predecessor and // this node. - if (!LiveRegDefs[I->getReg()]) { + SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; + assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && + "interference on register dependence"); + LiveRegDefs[I->getReg()] = I->getSUnit(); + if (!LiveRegGens[I->getReg()]) { ++NumLiveRegs; - LiveRegDefs[I->getReg()] = I->getSUnit(); - LiveRegCycles[I->getReg()] = CurCycle; + LiveRegGens[I->getReg()] = SU; } } } } +/// Check to see if any of the pending instructions are ready to issue. If +/// so, add them to the available queue. +void ScheduleDAGRRList::ReleasePending() { + if (DisableSchedCycles) { + assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); + return; + } + + // If the available queue is empty, it is safe to reset MinAvailableCycle. + if (AvailableQueue->empty()) + MinAvailableCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + unsigned ReadyCycle = + isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); + if (ReadyCycle < MinAvailableCycle) + MinAvailableCycle = ReadyCycle; + + if (PendingQueue[i]->isAvailable) { + if (!isReady(PendingQueue[i])) + continue; + AvailableQueue->push(PendingQueue[i]); + } + PendingQueue[i]->isPending = false; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } +} + +/// Move the scheduler state forward by the specified number of Cycles. +void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { + if (NextCycle <= CurCycle) + return; + + AvailableQueue->setCurCycle(NextCycle); + if (!HazardRec->isEnabled()) { + // Bypass lots of virtual calls in case of long latency. + CurCycle = NextCycle; + } + else { + for (; CurCycle != NextCycle; ++CurCycle) { + if (isBottomUp) + HazardRec->RecedeCycle(); + else + HazardRec->AdvanceCycle(); + } + } + // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the + // available Q to release pending nodes at least once before popping. + ReleasePending(); +} + +/// Move the scheduler state forward until the specified node's dependents are +/// ready and can be scheduled with no resource conflicts. +void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { + if (DisableSchedCycles) + return; + + unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); + + // Bump CurCycle to account for latency. We assume the latency of other + // available instructions may be hidden by the stall (not a full pipe stall). + // This updates the hazard recognizer's cycle before reserving resources for + // this instruction. + AdvanceToCycle(ReadyCycle); + + // Calls are scheduled in their preceding cycle, so don't conflict with + // hazards from instructions after the call. EmitNode will reset the + // scoreboard state before emitting the call. + if (isBottomUp && SU->isCall) + return; + + // FIXME: For resource conflicts in very long non-pipelined stages, we + // should probably skip ahead here to avoid useless scoreboard checks. + int Stalls = 0; + while (true) { + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); + + if (HT == ScheduleHazardRecognizer::NoHazard) + break; + + ++Stalls; + } + AdvanceToCycle(CurCycle + Stalls); +} + +/// Record this SUnit in the HazardRecognizer. +/// Does not update CurCycle. +void ScheduleDAGRRList::EmitNode(SUnit *SU) { + if (!HazardRec->isEnabled()) + return; + + // Check for phys reg copy. + if (!SU->getNode()) + return; + + switch (SU->getNode()->getOpcode()) { + default: + assert(SU->getNode()->isMachineOpcode() && + "This target-independent node should not be scheduled."); + break; + case ISD::MERGE_VALUES: + case ISD::TokenFactor: + case ISD::CopyToReg: + case ISD::CopyFromReg: + case ISD::EH_LABEL: + // Noops don't affect the scoreboard state. Copies are likely to be + // removed. + return; + case ISD::INLINEASM: + // For inline asm, clear the pipeline state. + HazardRec->Reset(); + return; + } + if (isBottomUp && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + + HazardRec->EmitInstruction(SU); + + if (!isBottomUp && SU->isCall) { + HazardRec->Reset(); + } +} + /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. -void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); @@ -278,36 +489,51 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); #endif - // FIXME: Handle noop hazard. + // FIXME: Do not modify node height. It may interfere with + // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the + // node it's ready cycle can aid heuristics, and after scheduling it can + // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); + + // Reserve resources for the scheduled intruction. + EmitNode(SU); + Sequence.push_back(SU); AvailableQueue->ScheduledNode(SU); - ReleasePredecessors(SU, CurCycle); + // Update liveness of predecessors before successors to avoid treating a + // two-address node as a live range def. + ReleasePredecessors(SU); // Release all the implicit physical register defs that are live. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { - if (I->isAssignedRegDep()) { - if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { - assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); - assert(LiveRegDefs[I->getReg()] == SU && - "Physical register dependency violated?"); - --NumLiveRegs; - LiveRegDefs[I->getReg()] = NULL; - LiveRegCycles[I->getReg()] = 0; - } + // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. + if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegGens[I->getReg()] = NULL; } } SU->isScheduled = true; + + // Conditions under which the scheduler should eagerly advance the cycle: + // (1) No available instructions + // (2) All pipelines full, so available instructions must have hazards. + // + // If HazardRec is disabled, count each inst as one cycle. + if (!HazardRec->isEnabled() || HazardRec->atIssueLimit() + || AvailableQueue->empty()) + AdvanceToCycle(CurCycle + 1); } /// CapturePred - This does the opposite of ReleasePred. Since SU is being /// unscheduled, incrcease the succ left count of its predecessors. Remove /// them from AvailableQueue if necessary. -void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { +void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); if (PredSU->isAvailable) { PredSU->isAvailable = false; @@ -328,59 +554,98 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { CapturePred(&*I); - if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ + if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(LiveRegDefs[I->getReg()] == I->getSUnit() && "Physical register dependency violated?"); --NumLiveRegs; LiveRegDefs[I->getReg()] = NULL; - LiveRegCycles[I->getReg()] = 0; + LiveRegGens[I->getReg()] = NULL; } } for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (I->isAssignedRegDep()) { + // This becomes the nearest def. Note that an earlier def may still be + // pending if this is a two-address node. + LiveRegDefs[I->getReg()] = SU; if (!LiveRegDefs[I->getReg()]) { - LiveRegDefs[I->getReg()] = SU; ++NumLiveRegs; } - if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) - LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); + if (LiveRegGens[I->getReg()] == NULL || + I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) + LiveRegGens[I->getReg()] = I->getSUnit(); } } + if (SU->getHeight() < MinAvailableCycle) + MinAvailableCycle = SU->getHeight(); SU->setHeightDirty(); SU->isScheduled = false; SU->isAvailable = true; - AvailableQueue->push(SU); + if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { + // Don't make available until backtracking is complete. + SU->isPending = true; + PendingQueue.push_back(SU); + } + else { + AvailableQueue->push(SU); + } AvailableQueue->UnscheduledNode(SU); } +/// After backtracking, the hazard checker needs to be restored to a state +/// corresponding the the current cycle. +void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { + HazardRec->Reset(); + + unsigned LookAhead = std::min((unsigned)Sequence.size(), + HazardRec->getMaxLookAhead()); + if (LookAhead == 0) + return; + + std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); + unsigned HazardCycle = (*I)->getHeight(); + for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { + SUnit *SU = *I; + for (; SU->getHeight() > HazardCycle; ++HazardCycle) { + HazardRec->RecedeCycle(); + } + EmitNode(SU); + } +} + /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in /// BTCycle in order to schedule a specific node. -void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, - unsigned &CurCycle) { - SUnit *OldSU = NULL; - while (CurCycle > BtCycle) { - OldSU = Sequence.back(); +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { + SUnit *OldSU = Sequence.back(); + while (true) { Sequence.pop_back(); if (SU->isSucc(OldSU)) // Don't try to remove SU from AvailableQueue. SU->isAvailable = false; + // FIXME: use ready cycle instead of height + CurCycle = OldSU->getHeight(); UnscheduleNodeBottomUp(OldSU); - --CurCycle; AvailableQueue->setCurCycle(CurCycle); + if (OldSU == BtSU) + break; + OldSU = Sequence.back(); } assert(!SU->isSucc(OldSU) && "Something is wrong!"); + RestoreHazardCheckerBottomUp(); + + ReleasePending(); + ++NumBacktracks; } static bool isOperandOf(const SUnit *SU, SDNode *N) { for (const SDNode *SUNode = SU->getNode(); SUNode; - SUNode = SUNode->getFlaggedNode()) { + SUNode = SUNode->getGluedNode()) { if (SUNode->isOperandOf(N)) return true; } @@ -390,18 +655,18 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) { /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled /// successors to the newly created node. SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { - if (SU->getNode()->getFlaggedNode()) - return NULL; - SDNode *N = SU->getNode(); if (!N) return NULL; + if (SU->getNode()->getGluedNode()) + return NULL; + SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); - if (VT == MVT::Flag) + if (VT == MVT::Glue) return NULL; else if (VT == MVT::Other) TryUnfold = true; @@ -409,7 +674,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); EVT VT = Op.getNode()->getValueType(Op.getResNo()); - if (VT == MVT::Flag) + if (VT == MVT::Glue) return NULL; } @@ -441,13 +706,15 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } else { LoadSU = CreateNewSUnit(LoadNode); LoadNode->setNodeId(LoadSU->NodeNum); + + InitNumRegDefsLeft(LoadSU); ComputeLatency(LoadSU); } SUnit *NewSU = CreateNewSUnit(N); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); for (unsigned i = 0; i != TID.getNumOperands(); ++i) { if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { @@ -457,6 +724,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } if (TID.isCommutable()) NewSU->isCommutable = true; + + InitNumRegDefsLeft(NewSU); ComputeLatency(NewSU); // Record all the edges to and from the old SU, by category. @@ -507,6 +776,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { RemovePred(SuccDep, D); D.setSUnit(NewSU); AddPred(SuccDep, D); + // Balance register pressure. + if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled + && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) + --NewSU->NumRegDefsLeft; } for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { SDep D = ChainSuccs[i]; @@ -517,7 +790,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { D.setSUnit(LoadSU); AddPred(SuccDep, D); } - } + } // Add a data dependency to reflect that NewSU reads the value defined // by LoadSU. @@ -633,52 +906,52 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. -static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, +static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, std::vector<SUnit*> &LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, SmallVector<unsigned, 4> &LRegs, const TargetRegisterInfo *TRI) { - bool Added = false; - if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { - if (RegAdded.insert(Reg)) { + for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { + + // Check if Ref is live. + if (!LiveRegDefs[Reg]) continue; + + // Allow multiple uses of the same def. + if (LiveRegDefs[Reg] == SU) continue; + + // Add Reg to the set of interfering live regs. + if (RegAdded.insert(Reg)) LRegs.push_back(Reg); - Added = true; - } } - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) - if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { - if (RegAdded.insert(*Alias)) { - LRegs.push_back(*Alias); - Added = true; - } - } - return Added; } /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay /// scheduling of the given node to satisfy live physical register dependencies. /// If the specific node is the last one that's available to schedule, do /// whatever is necessary (i.e. backtracking or cloning) to make it possible. -bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, - SmallVector<unsigned, 4> &LRegs){ +bool ScheduleDAGRRList:: +DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { if (NumLiveRegs == 0) return false; SmallSet<unsigned, 4> RegAdded; // If this node would clobber any "live" register, then it's not ready. + // + // If SU is the currently live definition of the same register that it uses, + // then we are free to schedule it. for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - if (I->isAssignedRegDep()) + if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, RegAdded, LRegs, TRI); } - for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { if (Node->getOpcode() == ISD::INLINEASM) { // Inline asm can clobber physical defs. unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) - --NumOps; // Ignore the flag operand. + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = @@ -708,17 +981,151 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } + return !LRegs.empty(); } +/// Return a node that can be scheduled in this cycle. Requirements: +/// (1) Ready: latency has been satisfied +/// (2) No Hazards: resources are available +/// (3) No Interferences: may unschedule to break register interferences. +SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { + SmallVector<SUnit*, 4> Interferences; + DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; + + SUnit *CurSU = AvailableQueue->pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + Interferences.push_back(CurSU); + CurSU = AvailableQueue->pop(); + } + if (CurSU) { + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { + Interferences[i]->isPending = false; + assert(Interferences[i]->isAvailable && "must still be available"); + AvailableQueue->push(Interferences[i]); + } + return CurSU; + } + + // All candidates are delayed due to live physical reg dependencies. + // Try backtracking, code duplication, or inserting cross class copies + // to resolve it. + for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { + SUnit *TrySU = Interferences[i]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + + // Try unscheduling up to the point where it's safe to schedule + // this node. + SUnit *BtSU = NULL; + unsigned LiveCycle = UINT_MAX; + for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { + unsigned Reg = LRegs[j]; + if (LiveRegGens[Reg]->getHeight() < LiveCycle) { + BtSU = LiveRegGens[Reg]; + LiveCycle = BtSU->getHeight(); + } + } + if (!WillCreateCycle(TrySU, BtSU)) { + BacktrackBottomUp(TrySU, BtSU); + + // Force the current node to be scheduled before the node that + // requires the physical reg dep. + if (BtSU->isAvailable) { + BtSU->isAvailable = false; + if (!BtSU->isPending) + AvailableQueue->remove(BtSU); + } + AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + + // If one or more successors has been unscheduled, then the current + // node is no longer avaialable. Schedule a successor that's now + // available instead. + if (!TrySU->isAvailable) { + CurSU = AvailableQueue->pop(); + } + else { + CurSU = TrySU; + TrySU->isPending = false; + Interferences.erase(Interferences.begin()+i); + } + break; + } + } + + if (!CurSU) { + // Can't backtrack. If it's too expensive to copy the value, then try + // duplicate the nodes that produces these "too expensive to copy" + // values to break the dependency. In case even that doesn't work, + // insert cross class copies. + // If it's not too expensive, i.e. cost != -1, issue copies. + SUnit *TrySU = Interferences[0]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); + AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + NewDef = Copies.back(); + } + + DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + assert(CurSU && "Unable to resolve live physical register dependencies!"); + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { + Interferences[i]->isPending = false; + // May no longer be available due to backtracking. + if (Interferences[i]->isAvailable) { + AvailableQueue->push(Interferences[i]); + } + } + return CurSU; +} /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up /// schedulers. void ScheduleDAGRRList::ListScheduleBottomUp() { - unsigned CurCycle = 0; - // Release any predecessors of the special Exit node. - ReleasePredecessors(&ExitSU, CurCycle); + ReleasePredecessors(&ExitSU); // Add root to Available queue. if (!SUnits.empty()) { @@ -730,135 +1137,29 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. - SmallVector<SUnit*, 4> NotReady; - DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { - bool Delayed = false; - LRegsMap.clear(); - SUnit *CurSU = AvailableQueue->pop(); - while (CurSU) { - SmallVector<unsigned, 4> LRegs; - if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) - break; - Delayed = true; - LRegsMap.insert(std::make_pair(CurSU, LRegs)); + DEBUG(dbgs() << "\n*** Examining Available\n"; + AvailableQueue->dump(this)); - CurSU->isPending = true; // This SU is not in AvailableQueue right now. - NotReady.push_back(CurSU); - CurSU = AvailableQueue->pop(); - } + // Pick the best node to schedule taking all constraints into + // consideration. + SUnit *SU = PickNodeToScheduleBottomUp(); - // All candidates are delayed due to live physical reg dependencies. - // Try backtracking, code duplication, or inserting cross class copies - // to resolve it. - if (Delayed && !CurSU) { - for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { - SUnit *TrySU = NotReady[i]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; - - // Try unscheduling up to the point where it's safe to schedule - // this node. - unsigned LiveCycle = CurCycle; - for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { - unsigned Reg = LRegs[j]; - unsigned LCycle = LiveRegCycles[Reg]; - LiveCycle = std::min(LiveCycle, LCycle); - } - SUnit *OldSU = Sequence[LiveCycle]; - if (!WillCreateCycle(TrySU, OldSU)) { - BacktrackBottomUp(TrySU, LiveCycle, CurCycle); - // Force the current node to be scheduled before the node that - // requires the physical reg dep. - if (OldSU->isAvailable) { - OldSU->isAvailable = false; - AvailableQueue->remove(OldSU); - } - AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true)); - // If one or more successors has been unscheduled, then the current - // node is no longer avaialable. Schedule a successor that's now - // available instead. - if (!TrySU->isAvailable) - CurSU = AvailableQueue->pop(); - else { - CurSU = TrySU; - TrySU->isPending = false; - NotReady.erase(NotReady.begin()+i); - } - break; - } - } + AdvancePastStalls(SU); - if (!CurSU) { - // Can't backtrack. If it's too expensive to copy the value, then try - // duplicate the nodes that produces these "too expensive to copy" - // values to break the dependency. In case even that doesn't work, - // insert cross class copies. - // If it's not too expensive, i.e. cost != -1, issue copies. - SUnit *TrySU = NotReady[0]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; - assert(LRegs.size() == 1 && "Can't handle this yet!"); - unsigned Reg = LRegs[0]; - SUnit *LRDef = LiveRegDefs[Reg]; - EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, VT); - const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - - // If cross copy register class is null, then it must be possible copy - // the value directly. Do not try duplicate the def. - SUnit *NewDef = 0; - if (DestRC) - NewDef = CopyAndMoveSuccessors(LRDef); - else - DestRC = RC; - if (!NewDef) { - // Issue copies, these can be expensive cross register class copies. - SmallVector<SUnit*, 2> Copies; - InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum - << " to SU #" << Copies.front()->NodeNum << "\n"); - AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); - NewDef = Copies.back(); - } + ScheduleNodeBottomUp(SU); - DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum - << " to SU #" << TrySU->NodeNum << "\n"); - LiveRegDefs[Reg] = NewDef; - AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); - TrySU->isAvailable = false; - CurSU = NewDef; - } - - assert(CurSU && "Unable to resolve live physical register dependencies!"); - } - - // Add the nodes that aren't ready back onto the available list. - for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { - NotReady[i]->isPending = false; - // May no longer be available due to backtracking. - if (NotReady[i]->isAvailable) - AvailableQueue->push(NotReady[i]); + while (AvailableQueue->empty() && !PendingQueue.empty()) { + // Advance the cycle to free resources. Skip ahead to the next ready SU. + assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); + AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); } - NotReady.clear(); - - if (CurSU) - ScheduleNodeBottomUp(CurSU, CurCycle); - ++CurCycle; - AvailableQueue->setCurCycle(CurCycle); } // Reverse the order if it is bottom up. std::reverse(Sequence.begin(), Sequence.end()); - + #ifndef NDEBUG VerifySchedule(isBottomUp); #endif @@ -905,7 +1206,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. -void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { +void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); @@ -921,7 +1222,6 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { /// ListScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. void ScheduleDAGRRList::ListScheduleTopDown() { - unsigned CurCycle = 0; AvailableQueue->setCurCycle(CurCycle); // Release any successors of the special Entry node. @@ -935,19 +1235,19 @@ void ScheduleDAGRRList::ListScheduleTopDown() { SUnits[i].isAvailable = true; } } - + // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { SUnit *CurSU = AvailableQueue->pop(); - + if (CurSU) - ScheduleNodeTopDown(CurSU, CurCycle); + ScheduleNodeTopDown(CurSU); ++CurCycle; AvailableQueue->setCurCycle(CurCycle); } - + #ifndef NDEBUG VerifySchedule(isBottomUp); #endif @@ -955,70 +1255,288 @@ void ScheduleDAGRRList::ListScheduleTopDown() { //===----------------------------------------------------------------------===// -// RegReductionPriorityQueue Implementation +// RegReductionPriorityQueue Definition //===----------------------------------------------------------------------===// // // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers // to reduce register pressure. -// +// namespace { - template<class SF> - class RegReductionPriorityQueue; - - /// bu_ls_rr_sort - Priority function for bottom up register pressure - // reduction scheduler. - struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { - RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; - bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} - bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; +class RegReductionPQBase; + +struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { + bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } +}; + +/// bu_ls_rr_sort - Priority function for bottom up register pressure +// reduction scheduler. +struct bu_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false }; - // td_ls_rr_sort - Priority function for top down register pressure reduction - // scheduler. - struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { - RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; - td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} - td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; + RegReductionPQBase *SPQ; + bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(SUnit* left, SUnit* right) const; +}; + +// td_ls_rr_sort - Priority function for top down register pressure reduction +// scheduler. +struct td_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = false, + HasReadyFilter = false }; - // src_ls_rr_sort - Priority function for source order scheduler. - struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { - RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; - src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) - : SPQ(spq) {} - src_ls_rr_sort(const src_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; + RegReductionPQBase *SPQ; + td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; +}; + +// src_ls_rr_sort - Priority function for source order scheduler. +struct src_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false }; - // hybrid_ls_rr_sort - Priority function for hybrid scheduler. - struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { - RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; - hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) - : SPQ(spq) {} - hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} + RegReductionPQBase *SPQ; + src_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + src_ls_rr_sort(const src_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool operator()(SUnit* left, SUnit* right) const; +}; - bool operator()(const SUnit* left, const SUnit* right) const; +// hybrid_ls_rr_sort - Priority function for hybrid scheduler. +struct hybrid_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = true }; - // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) - // scheduler. - struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { - RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ; - ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq) - : SPQ(spq) {} - ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} + RegReductionPQBase *SPQ; + hybrid_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool isReady(SUnit *SU, unsigned CurCycle) const; - bool operator()(const SUnit* left, const SUnit* right) const; + bool operator()(SUnit* left, SUnit* right) const; +}; + +// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) +// scheduler. +struct ilp_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = true }; -} // end anonymous namespace + + RegReductionPQBase *SPQ; + ilp_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool isReady(SUnit *SU, unsigned CurCycle) const; + + bool operator()(SUnit* left, SUnit* right) const; +}; + +class RegReductionPQBase : public SchedulingPriorityQueue { +protected: + std::vector<SUnit*> Queue; + unsigned CurQueueId; + bool TracksRegPressure; + + // SUnits - The SUnits for the current graph. + std::vector<SUnit> *SUnits; + + MachineFunction &MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + ScheduleDAGRRList *scheduleDAG; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector<unsigned> SethiUllmanNumbers; + + /// RegPressure - Tracking current reg pressure per register class. + /// + std::vector<unsigned> RegPressure; + + /// RegLimit - Tracking the number of allocatable registers per register + /// class. + std::vector<unsigned> RegLimit; + +public: + RegReductionPQBase(MachineFunction &mf, + bool hasReadyFilter, + bool tracksrp, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : SchedulingPriorityQueue(hasReadyFilter), + CurQueueId(0), TracksRegPressure(tracksrp), + MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { + if (TracksRegPressure) { + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); + } + } + + void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { + scheduleDAG = scheduleDag; + } + + ScheduleHazardRecognizer* getHazardRec() { + return scheduleDAG->getHazardRec(); + } + + void initNodes(std::vector<SUnit> &sunits); + + void addNode(const SUnit *SU); + + void updateNode(const SUnit *SU); + + void releaseState() { + SUnits = 0; + SethiUllmanNumbers.clear(); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + } + + unsigned getNodePriority(const SUnit *SU) const; + + unsigned getNodeOrdering(const SUnit *SU) const { + return scheduleDAG->DAG->GetOrdering(SU->getNode()); + } + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + assert(!U->NodeQueueId && "Node in the queue already"); + U->NodeQueueId = ++CurQueueId; + Queue.push_back(U); + } + + void remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + assert(SU->NodeQueueId != 0 && "Not in queue!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), + SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + Queue.pop_back(); + SU->NodeQueueId = 0; + } + + bool tracksRegPressure() const { return TracksRegPressure; } + + void dumpRegPressure() const; + + bool HighRegPressure(const SUnit *SU) const; + + bool MayReduceRegPressure(SUnit *SU); + + void ScheduledNode(SUnit *SU); + + void UnscheduledNode(SUnit *SU); + +protected: + bool canClobber(const SUnit *SU, const SUnit *Op); + void AddPseudoTwoAddrDeps(); + void PrescheduleNodesWithMultipleUses(); + void CalculateSethiUllmanNumbers(); +}; + +template<class SF> +class RegReductionPriorityQueue : public RegReductionPQBase { + static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) { + std::vector<SUnit *>::iterator Best = Q.begin(); + for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + E = Q.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Q.end())) + std::swap(*Best, Q.back()); + Q.pop_back(); + return V; + } + + SF Picker; + +public: + RegReductionPriorityQueue(MachineFunction &mf, + bool tracksrp, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), + Picker(this) {} + + bool isBottomUp() const { return SF::IsBottomUp; } + + bool isReady(SUnit *U) const { + return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); + } + + SUnit *pop() { + if (Queue.empty()) return NULL; + + SUnit *V = popFromQueue(Queue, Picker); + V->NodeQueueId = 0; + return V; + } + + void dump(ScheduleDAG *DAG) const { + // Emulate pop() without clobbering NodeQueueIds. + std::vector<SUnit*> DumpQueue = Queue; + SF DumpPicker = Picker; + while (!DumpQueue.empty()) { + SUnit *SU = popFromQueue(DumpQueue, DumpPicker); + if (isBottomUp()) + dbgs() << "Height " << SU->getHeight() << ": "; + else + dbgs() << "Depth " << SU->getDepth() << ": "; + SU->dump(DAG); + } + } +}; + +typedef RegReductionPriorityQueue<bu_ls_rr_sort> +BURegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<td_ls_rr_sort> +TDRegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<src_ls_rr_sort> +SrcRegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> +HybridBURRPriorityQueue; + +typedef RegReductionPriorityQueue<ilp_ls_rr_sort> +ILPBURRPriorityQueue; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Static Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. /// Smaller number is the higher priority. @@ -1045,413 +1563,283 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { if (SethiUllmanNumber == 0) SethiUllmanNumber = 1; - + return SethiUllmanNumber; } -namespace { - template<class SF> - class RegReductionPriorityQueue : public SchedulingPriorityQueue { - std::vector<SUnit*> Queue; - SF Picker; - unsigned CurQueueId; - bool TracksRegPressure; - - protected: - // SUnits - The SUnits for the current graph. - std::vector<SUnit> *SUnits; - - MachineFunction &MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - const TargetLowering *TLI; - ScheduleDAGRRList *scheduleDAG; - - // SethiUllmanNumbers - The SethiUllman number for each node. - std::vector<unsigned> SethiUllmanNumbers; - - /// RegPressure - Tracking current reg pressure per register class. - /// - std::vector<unsigned> RegPressure; - - /// RegLimit - Tracking the number of allocatable registers per register - /// class. - std::vector<unsigned> RegLimit; - - public: - RegReductionPriorityQueue(MachineFunction &mf, - bool tracksrp, - const TargetInstrInfo *tii, - const TargetRegisterInfo *tri, - const TargetLowering *tli) - : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp), - MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { - if (TracksRegPressure) { - unsigned NumRC = TRI->getNumRegClasses(); - RegLimit.resize(NumRC); - RegPressure.resize(NumRC); - std::fill(RegLimit.begin(), RegLimit.end(), 0); - std::fill(RegPressure.begin(), RegPressure.end(), 0); - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); - } - } - - void initNodes(std::vector<SUnit> &sunits) { - SUnits = &sunits; - // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); - // Reroute edges to nodes with multiple uses. - PrescheduleNodesWithMultipleUses(); - // Calculate node priorities. - CalculateSethiUllmanNumbers(); - } - - void addNode(const SUnit *SU) { - unsigned SUSize = SethiUllmanNumbers.size(); - if (SUnits->size() > SUSize) - SethiUllmanNumbers.resize(SUSize*2, 0); - CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); - } - - void updateNode(const SUnit *SU) { - SethiUllmanNumbers[SU->NodeNum] = 0; - CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); - } +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +void RegReductionPQBase::CalculateSethiUllmanNumbers() { + SethiUllmanNumbers.assign(SUnits->size(), 0); - void releaseState() { - SUnits = 0; - SethiUllmanNumbers.clear(); - std::fill(RegPressure.begin(), RegPressure.end(), 0); - } + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); +} - unsigned getNodePriority(const SUnit *SU) const { - assert(SU->NodeNum < SethiUllmanNumbers.size()); - unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; - if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) - // CopyToReg should be close to its uses to facilitate coalescing and - // avoid spilling. - return 0; - if (Opc == TargetOpcode::EXTRACT_SUBREG || - Opc == TargetOpcode::SUBREG_TO_REG || - Opc == TargetOpcode::INSERT_SUBREG) - // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be - // close to their uses to facilitate coalescing. - return 0; - if (SU->NumSuccs == 0 && SU->NumPreds != 0) - // If SU does not have a register use, i.e. it doesn't produce a value - // that would be consumed (e.g. store), then it terminates a chain of - // computation. Give it a large SethiUllman number so it will be - // scheduled right before its predecessors that it doesn't lengthen - // their live ranges. - return 0xffff; - if (SU->NumPreds == 0 && SU->NumSuccs != 0) - // If SU does not have a register def, schedule it close to its uses - // because it does not lengthen any live ranges. - return 0; - return SethiUllmanNumbers[SU->NodeNum]; - } +void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + if (!TracksRegPressure) + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); +} - unsigned getNodeOrdering(const SUnit *SU) const { - return scheduleDAG->DAG->GetOrdering(SU->getNode()); - } +void RegReductionPQBase::addNode(const SUnit *SU) { + unsigned SUSize = SethiUllmanNumbers.size(); + if (SUnits->size() > SUSize) + SethiUllmanNumbers.resize(SUSize*2, 0); + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} - bool empty() const { return Queue.empty(); } - - void push(SUnit *U) { - assert(!U->NodeQueueId && "Node in the queue already"); - U->NodeQueueId = ++CurQueueId; - Queue.push_back(U); - } +void RegReductionPQBase::updateNode(const SUnit *SU) { + SethiUllmanNumbers[SU->NodeNum] = 0; + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} - SUnit *pop() { - if (empty()) return NULL; - std::vector<SUnit *>::iterator Best = Queue.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), - E = Queue.end(); I != E; ++I) - if (Picker(*Best, *I)) - Best = I; - SUnit *V = *Best; - if (Best != prior(Queue.end())) - std::swap(*Best, Queue.back()); - Queue.pop_back(); - V->NodeQueueId = 0; - return V; - } +// Lower priority means schedule further down. For bottom-up scheduling, lower +// priority SUs are scheduled before higher priority SUs. +unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { + assert(SU->NodeNum < SethiUllmanNumbers.size()); + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return 0; + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return 0; + if (SU->NumSuccs == 0 && SU->NumPreds != 0) + // If SU does not have a register use, i.e. it doesn't produce a value + // that would be consumed (e.g. store), then it terminates a chain of + // computation. Give it a large SethiUllman number so it will be + // scheduled right before its predecessors that it doesn't lengthen + // their live ranges. + return 0xffff; + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return 0; + return SethiUllmanNumbers[SU->NodeNum]; +} - void remove(SUnit *SU) { - assert(!Queue.empty() && "Queue is empty!"); - assert(SU->NodeQueueId != 0 && "Not in queue!"); - std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), - SU); - if (I != prior(Queue.end())) - std::swap(*I, Queue.back()); - Queue.pop_back(); - SU->NodeQueueId = 0; - } +//===----------------------------------------------------------------------===// +// Register Pressure Tracking +//===----------------------------------------------------------------------===// - bool HighRegPressure(const SUnit *SU) const { - if (!TLI) - return false; +void RegReductionPQBase::dumpRegPressure() const { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + unsigned Id = RC->getID(); + unsigned RP = RegPressure[Id]; + if (!RP) continue; + DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] + << '\n'); + } +} - for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) - continue; - SUnit *PredSU = I->getSUnit(); - const SDNode *PN = PredSU->getNode(); - if (!PN->isMachineOpcode()) { - if (PN->getOpcode() == ISD::CopyFromReg) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); - if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) - return true; - } - continue; - } - unsigned POpc = PN->getMachineOpcode(); - if (POpc == TargetOpcode::IMPLICIT_DEF) - continue; - if (POpc == TargetOpcode::EXTRACT_SUBREG) { - EVT VT = PN->getOperand(0).getValueType(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); - // Check if this increases register pressure of the specific register - // class to the point where it would cause spills. - if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) - return true; - continue; - } else if (POpc == TargetOpcode::INSERT_SUBREG || - POpc == TargetOpcode::SUBREG_TO_REG) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); - // Check if this increases register pressure of the specific register - // class to the point where it would cause spills. - if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) - return true; - continue; - } - unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = PN->getValueType(i); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] >= RegLimit[RCId]) - return true; // Reg pressure already high. - unsigned Cost = TLI->getRepRegClassCostFor(VT); - if (!PN->hasAnyUseOfValue(i)) - continue; - // Check if this increases register pressure of the specific register - // class to the point where it would cause spills. - if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) - return true; - } - } +bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { + if (!TLI) + return false; - return false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + continue; + } + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance()) { + EVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; } + } + return false; +} - void ScheduledNode(SUnit *SU) { - if (!TracksRegPressure) - return; - - const SDNode *N = SU->getNode(); - if (!N->isMachineOpcode()) { - if (N->getOpcode() != ISD::CopyToReg) - return; - } else { - unsigned Opc = N->getMachineOpcode(); - if (Opc == TargetOpcode::EXTRACT_SUBREG || - Opc == TargetOpcode::INSERT_SUBREG || - Opc == TargetOpcode::SUBREG_TO_REG || - Opc == TargetOpcode::REG_SEQUENCE || - Opc == TargetOpcode::IMPLICIT_DEF) - return; - } +bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) { + const SDNode *N = SU->getNode(); - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) - continue; - SUnit *PredSU = I->getSUnit(); - if (PredSU->NumSuccsLeft != PredSU->NumSuccs) - continue; - const SDNode *PN = PredSU->getNode(); - if (!PN->isMachineOpcode()) { - if (PN->getOpcode() == ISD::CopyFromReg) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - } - continue; - } - unsigned POpc = PN->getMachineOpcode(); - if (POpc == TargetOpcode::IMPLICIT_DEF) - continue; - if (POpc == TargetOpcode::EXTRACT_SUBREG) { - EVT VT = PN->getOperand(0).getValueType(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } else if (POpc == TargetOpcode::INSERT_SUBREG || - POpc == TargetOpcode::SUBREG_TO_REG) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } - unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = PN->getValueType(i); - if (!PN->hasAnyUseOfValue(i)) - continue; - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - } - } + if (!N->isMachineOpcode() || !SU->NumSuccs) + return false; - // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() - // may transfer data dependencies to CopyToReg. - if (SU->NumSuccs && N->isMachineOpcode()) { - unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = N->getValueType(i); - if (!N->hasAnyUseOfValue(i)) - continue; - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) - // Register pressure tracking is imprecise. This can happen. - RegPressure[RCId] = 0; - else - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); - } - } + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = N->getValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + return true; + } + return false; +} + +void RegReductionPQBase::ScheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; - dumpRegPressure(); + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + continue; + } + // FIXME: The ScheduleDAG currently loses information about which of a + // node's values is consumed by each dependence. Consequently, if the node + // defines multiple register classes, we don't know which to pressurize + // here. Instead the following loop consumes the register defs in an + // arbitrary order. At least it handles the common case of clustered loads + // to the same class. For precise liveness, each SDep needs to indicate the + // result number. But that tightly couples the ScheduleDAG with the + // SelectionDAG making updates tricky. A simpler hack would be to attach a + // value type or register class to SDep. + // + // The most important aspect of register tracking is balancing the increase + // here with the reduction further below. Note that this SU may use multiple + // defs in PredSU. The can't be determined here, but we've already + // compensated by reducing NumRegDefsLeft in PredSU during + // ScheduleDAGSDNodes::AddSchedEdges. + --PredSU->NumRegDefsLeft; + unsigned SkipRegDefs = PredSU->NumRegDefsLeft; + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { + if (SkipRegDefs) + continue; + EVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + break; } + } - void UnscheduledNode(SUnit *SU) { - if (!TracksRegPressure) - return; - - const SDNode *N = SU->getNode(); - if (!N->isMachineOpcode()) { - if (N->getOpcode() != ISD::CopyToReg) - return; - } else { - unsigned Opc = N->getMachineOpcode(); - if (Opc == TargetOpcode::EXTRACT_SUBREG || - Opc == TargetOpcode::INSERT_SUBREG || - Opc == TargetOpcode::SUBREG_TO_REG || - Opc == TargetOpcode::REG_SEQUENCE || - Opc == TargetOpcode::IMPLICIT_DEF) - return; - } + // We should have this assert, but there may be dead SDNodes that never + // materialize as SUnits, so they don't appear to generate liveness. + //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); + int SkipRegDefs = (int)SU->NumRegDefsLeft; + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { + if (SkipRegDefs > 0) + continue; + EVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) { + // Register pressure tracking is imprecise. This can happen. But we try + // hard not to let it happen because it likely results in poor scheduling. + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); + RegPressure[RCId] = 0; + } + else { + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } + } + dumpRegPressure(); +} - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) - continue; - SUnit *PredSU = I->getSUnit(); - if (PredSU->NumSuccsLeft != PredSU->NumSuccs) - continue; - const SDNode *PN = PredSU->getNode(); - if (!PN->isMachineOpcode()) { - if (PN->getOpcode() == ISD::CopyFromReg) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - } - continue; - } - unsigned POpc = PN->getMachineOpcode(); - if (POpc == TargetOpcode::IMPLICIT_DEF) - continue; - if (POpc == TargetOpcode::EXTRACT_SUBREG) { - EVT VT = PN->getOperand(0).getValueType(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } else if (POpc == TargetOpcode::INSERT_SUBREG || - POpc == TargetOpcode::SUBREG_TO_REG) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } - unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = PN->getValueType(i); - if (!PN->hasAnyUseOfValue(i)) - continue; - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) - // Register pressure tracking is imprecise. This can happen. - RegPressure[RCId] = 0; - else - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); - } - } +void RegReductionPQBase::UnscheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + const SDNode *N = SU->getNode(); + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } else { + unsigned Opc = N->getMachineOpcode(); + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::REG_SEQUENCE || + Opc == TargetOpcode::IMPLICIT_DEF) + return; + } - // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() - // may transfer data dependencies to CopyToReg. - if (SU->NumSuccs && N->isMachineOpcode()) { - unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); - if (VT == MVT::Flag || VT == MVT::Other) - continue; - if (!N->hasAnyUseOfValue(i)) - continue; - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - } + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only + // counts data deps. + if (PredSU->NumSuccsLeft != PredSU->Succs.size()) + continue; + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); } - - dumpRegPressure(); + continue; } - - void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { - scheduleDAG = scheduleDag; + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; } - - void dumpRegPressure() const { - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) { - const TargetRegisterClass *RC = *I; - unsigned Id = RC->getID(); - unsigned RP = RegPressure[Id]; - if (!RP) continue; - DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] - << '\n'); - } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = PN->getValueType(i); + if (!PN->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) + // Register pressure tracking is imprecise. This can happen. + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); } + } - protected: - bool canClobber(const SUnit *SU, const SUnit *Op); - void AddPseudoTwoAddrDeps(); - void PrescheduleNodesWithMultipleUses(); - void CalculateSethiUllmanNumbers(); - }; - - typedef RegReductionPriorityQueue<bu_ls_rr_sort> - BURegReductionPriorityQueue; - - typedef RegReductionPriorityQueue<td_ls_rr_sort> - TDRegReductionPriorityQueue; - - typedef RegReductionPriorityQueue<src_ls_rr_sort> - SrcRegReductionPriorityQueue; - - typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> - HybridBURRPriorityQueue; + // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() + // may transfer data dependencies to CopyToReg. + if (SU->NumSuccs && N->isMachineOpcode()) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT == MVT::Glue || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + } - typedef RegReductionPriorityQueue<ilp_ls_rr_sort> - ILPBURRPriorityQueue; + dumpRegPressure(); } +//===----------------------------------------------------------------------===// +// Dynamic Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// + /// closestSucc - Returns the scheduled cycle of the successor which is /// closest to the current cycle. static unsigned closestSucc(const SUnit *SU) { @@ -1483,9 +1871,123 @@ static unsigned calcMaxScratches(const SUnit *SU) { return Scratches; } -template <typename RRSort> -static bool BURRSort(const SUnit *left, const SUnit *right, - const RegReductionPriorityQueue<RRSort> *SPQ) { +/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a +/// CopyToReg to a virtual register. This SU def is probably a liveout and +/// it has no other use. It should be scheduled closer to the terminator. +static bool hasOnlyLiveOutUses(const SUnit *SU) { + bool RetVal = false; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *SuccSU = I->getSUnit(); + if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { + unsigned Reg = + cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +/// UnitsSharePred - Return true if the two scheduling units share a common +/// data predecessor. +static bool UnitsSharePred(const SUnit *left, const SUnit *right) { + SmallSet<const SUnit*, 4> Preds; + for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + Preds.insert(I->getSUnit()); + } + for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (Preds.count(I->getSUnit())) + return true; + } + return false; +} + +// Check for either a dependence (latency) or resource (hazard) stall. +// +// Note: The ScheduleHazardRecognizer interface requires a non-const SU. +static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { + if ((int)SPQ->getCurCycle() < Height) return true; + if (SPQ->getHazardRec()->getHazardType(SU, 0) + != ScheduleHazardRecognizer::NoHazard) + return true; + return false; +} + +// Return -1 if left has higher priority, 1 if right has higher priority. +// Return 0 if latency-based priority is equivalent. +static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, + RegReductionPQBase *SPQ) { + // If the two nodes share an operand and one of them has a single + // use that is a live out copy, favor the one that is live out. Otherwise + // it will be difficult to eliminate the copy if the instruction is a + // loop induction variable update. e.g. + // BB: + // sub r1, r3, #1 + // str r0, [r2, r3] + // mov r3, r1 + // cmp + // bne BB + bool SharePred = UnitsSharePred(left, right); + // FIXME: Only adjust if BB is a loop back edge. + // FIXME: What's the cost of a copy? + int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0; + int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0; + int LHeight = (int)left->getHeight() - LBonus; + int RHeight = (int)right->getHeight() - RBonus; + + bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && + BUHasStall(left, LHeight, SPQ); + bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) && + BUHasStall(right, RHeight, SPQ); + + // If scheduling one of the node will cause a pipeline stall, delay it. + // If scheduling either one of the node will cause a pipeline stall, sort + // them according to their height. + if (LStall) { + if (!RStall) + return 1; + if (LHeight != RHeight) + return LHeight > RHeight ? 1 : -1; + } else if (RStall) + return -1; + + // If either node is scheduling for latency, sort them by height/depth + // and latency. + if (!checkPref || (left->SchedulingPref == Sched::Latency || + right->SchedulingPref == Sched::Latency)) { + if (DisableSchedCycles) { + if (LHeight != RHeight) + return LHeight > RHeight ? 1 : -1; + } + else { + // If neither instruction stalls (!LStall && !RStall) then + // it's height is already covered so only its depth matters. We also reach + // this if both stall but have the same height. + unsigned LDepth = left->getDepth(); + unsigned RDepth = right->getDepth(); + if (LDepth != RDepth) { + DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum + << ") depth " << LDepth << " vs SU (" << right->NodeNum + << ") depth " << RDepth << "\n"); + return LDepth < RDepth ? 1 : -1; + } + } + if (left->Latency != right->Latency) + return left->Latency > right->Latency ? 1 : -1; + } + return 0; +} + +static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); if (LPriority != RPriority) @@ -1519,24 +2021,31 @@ static bool BURRSort(const SUnit *left, const SUnit *right, if (LScratch != RScratch) return LScratch > RScratch; - if (left->getHeight() != right->getHeight()) - return left->getHeight() > right->getHeight(); - - if (left->getDepth() != right->getDepth()) - return left->getDepth() < right->getDepth(); + if (!DisableSchedCycles) { + int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); + if (result != 0) + return result > 0; + } + else { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); - assert(left->NodeQueueId && right->NodeQueueId && + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + } + + assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); return (left->NodeQueueId > right->NodeQueueId); } // Bottom up -bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { +bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { return BURRSort(left, right, SPQ); } // Source order, otherwise bottom up. -bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { +bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { unsigned LOrder = SPQ->getNodeOrdering(left); unsigned ROrder = SPQ->getNodeOrdering(right); @@ -1548,49 +2057,69 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { return BURRSort(left, right, SPQ); } -bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ +// If the time between now and when the instruction will be ready can cover +// the spill code, then avoid adding it to the ready queue. This gives long +// stalls highest priority and allows hoisting across calls. It should also +// speed up processing the available queue. +bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { + static const unsigned ReadyDelay = 3; + + if (SPQ->MayReduceRegPressure(SU)) return true; + + if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; + + if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) + != ScheduleHazardRecognizer::NoHazard) + return false; + + return true; +} + +// Return true if right should be scheduled with higher priority than left. +bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + bool LHigh = SPQ->HighRegPressure(left); bool RHigh = SPQ->HighRegPressure(right); // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. - if (LHigh && !RHigh) + if (LHigh && !RHigh) { + DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" + << right->NodeNum << ")\n"); return true; - else if (!LHigh && RHigh) + } + else if (!LHigh && RHigh) { + DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" + << left->NodeNum << ")\n"); return false; + } else if (!LHigh && !RHigh) { - // Low register pressure situation, schedule for latency if possible. - bool LStall = left->SchedulingPref == Sched::Latency && - SPQ->getCurCycle() < left->getHeight(); - bool RStall = right->SchedulingPref == Sched::Latency && - SPQ->getCurCycle() < right->getHeight(); - // If scheduling one of the node will cause a pipeline stall, delay it. - // If scheduling either one of the node will cause a pipeline stall, sort - // them according to their height. - // If neither will cause a pipeline stall, try to reduce register pressure. - if (LStall) { - if (!RStall) - return true; - if (left->getHeight() != right->getHeight()) - return left->getHeight() > right->getHeight(); - } else if (RStall) - return false; - - // If either node is scheduling for latency, sort them by height and latency - // first. - if (left->SchedulingPref == Sched::Latency || - right->SchedulingPref == Sched::Latency) { - if (left->getHeight() != right->getHeight()) - return left->getHeight() > right->getHeight(); - if (left->Latency != right->Latency) - return left->Latency > right->Latency; - } + int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); + if (result != 0) + return result > 0; } - return BURRSort(left, right, SPQ); } -bool ilp_ls_rr_sort::operator()(const SUnit *left, - const SUnit *right) const { +// Schedule as many instructions in each cycle as possible. So don't make an +// instruction available unless it is ready in the current cycle. +bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { + if (SU->getHeight() > CurCycle) return false; + + if (SPQ->getHazardRec()->getHazardType(SU, 0) + != ScheduleHazardRecognizer::NoHazard) + return false; + + return SU->getHeight() <= CurCycle; +} + +bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + bool LHigh = SPQ->HighRegPressure(left); bool RHigh = SPQ->HighRegPressure(right); // Avoid causing spills. If register pressure is high, schedule for @@ -1611,9 +2140,11 @@ bool ilp_ls_rr_sort::operator()(const SUnit *left, return BURRSort(left, right, SPQ); } -template<class SF> -bool -RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { +//===----------------------------------------------------------------------===// +// Preschedule for Register Pressure +//===----------------------------------------------------------------------===// + +bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { if (SU->isTwoAddress) { unsigned Opc = SU->getNode()->getMachineOpcode(); const TargetInstrDesc &TID = TII->get(Opc); @@ -1631,19 +2162,6 @@ RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { return false; } -/// hasCopyToRegUse - Return true if SU has a value successor that is a -/// CopyToReg node. -static bool hasCopyToRegUse(const SUnit *SU) { - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isCtrl()) continue; - const SUnit *SuccSU = I->getSUnit(); - if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) - return true; - } - return false; -} - /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's /// physical register defs. static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, @@ -1654,7 +2172,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); assert(ImpDefs && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; - SUNode = SUNode->getFlaggedNode()) { + SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; const unsigned *SUImpDefs = @@ -1663,7 +2181,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, return false; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); - if (VT == MVT::Flag || VT == MVT::Other) + if (VT == MVT::Glue || VT == MVT::Other) continue; if (!N->hasAnyUseOfValue(i)) continue; @@ -1709,8 +2227,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, /// after N, which shortens the U->N live range, reducing /// register pressure. /// -template<class SF> -void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { +void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // Visit all the nodes in topological order, working top-down. for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { SUnit *SU = &(*SUnits)[i]; @@ -1748,7 +2265,7 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { if (PredSU->NumSuccs == 1) continue; // Avoid prescheduling to copies from virtual registers, which don't behave - // like other nodes from the perspective of scheduling // heuristics. + // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU->getNode()) if (N->getOpcode() == ISD::CopyFromReg && TargetRegisterInfo::isVirtualRegister @@ -1802,17 +2319,17 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { /// one that has a CopyToReg use (more likely to be a loop induction update). /// If both are two-address, but one is commutable while the other is not /// commutable, favor the one that's not commutable. -template<class SF> -void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { +void RegReductionPQBase::AddPseudoTwoAddrDeps() { for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { SUnit *SU = &(*SUnits)[i]; if (!SU->isTwoAddress) continue; SDNode *Node = SU->getNode(); - if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) + if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) continue; + bool isLiveOut = hasOnlyLiveOutUses(SU); unsigned Opc = Node->getMachineOpcode(); const TargetInstrDesc &TID = TII->get(Opc); unsigned NumRes = TID.getNumDefs(); @@ -1862,7 +2379,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { SuccOpc == TargetOpcode::SUBREG_TO_REG) continue; if ((!canClobber(SuccSU, DUSU) || - (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || + (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || (!SU->isCommutable && SuccSU->isCommutable)) && !scheduleDAG->IsReachable(SuccSU, SU)) { DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" @@ -1877,20 +2394,10 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { } } -/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all -/// scheduling units. -template<class SF> -void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { - SethiUllmanNumbers.assign(SUnits->size(), 0); - - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) - CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); -} - /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled /// predecessors of the successors of the SUnit SU. Stop when the provided /// limit is exceeded. -static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, +static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, unsigned Limit) { unsigned Sum = 0; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); @@ -1942,7 +2449,7 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { if (left->NumSuccsLeft != right->NumSuccsLeft) return left->NumSuccsLeft > right->NumSuccsLeft; - assert(left->NodeQueueId && right->NodeQueueId && + assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); return (left->NodeQueueId > right->NodeQueueId); } @@ -1952,68 +2459,74 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { //===----------------------------------------------------------------------===// llvm::ScheduleDAGSDNodes * -llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - + BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); - return SD; + return SD; } llvm::ScheduleDAGSDNodes * -llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - + TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; } llvm::ScheduleDAGSDNodes * -llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - + SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); - return SD; + return SD; } llvm::ScheduleDAGSDNodes * -llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); const TargetLowering *TLI = &IS->getTargetLowering(); - + HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); - return SD; + return SD; } llvm::ScheduleDAGSDNodes * -llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createILPListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); const TargetLowering *TLI = &IS->getTargetLowering(); - + ILPBURRPriorityQueue *PQ = new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); - return SD; + return SD; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index f1bf82a..477c1ff 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -34,8 +34,8 @@ using namespace llvm; STATISTIC(LoadsClustered, "Number of loads clustered together"); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf) { -} + : ScheduleDAG(mf), + InstrItins(mf.getTarget().getInstrItineraryData()) {} /// Run - perform scheduling. /// @@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SUnit *SU = NewSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; + SU->isCall = Old->isCall; SU->isTwoAddress = Old->isTwoAddress; SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; @@ -85,7 +86,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { /// a specified operand is a physical register dependency. If so, returns the /// register and the cost of copying the register. static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, - const TargetRegisterInfo *TRI, + const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, unsigned &PhysReg, int &Cost) { if (Op != 2 || User->getOpcode() != ISD::CopyToReg) @@ -108,29 +109,28 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, } } -static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, - SelectionDAG *DAG) { +static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { SmallVector<EVT, 4> VTs; - SDNode *FlagDestNode = Flag.getNode(); + SDNode *GlueDestNode = Glue.getNode(); - // Don't add a flag from a node to itself. - if (FlagDestNode == N) return; + // Don't add glue from a node to itself. + if (GlueDestNode == N) return; - // Don't add a flag to something which already has a flag. - if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return; + // Don't add glue to something which already has glue. + if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return; for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) VTs.push_back(N->getValueType(I)); - if (AddFlag) - VTs.push_back(MVT::Flag); + if (AddGlue) + VTs.push_back(MVT::Glue); SmallVector<SDValue, 4> Ops; for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) Ops.push_back(N->getOperand(I)); - if (FlagDestNode) - Ops.push_back(Flag); + if (GlueDestNode) + Ops.push_back(Glue); SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); MachineSDNode::mmo_iterator Begin = 0, End = 0; @@ -149,9 +149,9 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, MN->setMemRefs(Begin, End); } -/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them. +/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. /// This function finds loads of the same base and different offsets. If the -/// offsets are not far apart (target specific), it add MVT::Flag inputs and +/// offsets are not far apart (target specific), it add MVT::Glue inputs and /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { @@ -213,20 +213,20 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { if (NumLoads == 0) return; - // Cluster loads by adding MVT::Flag outputs and inputs. This also + // Cluster loads by adding MVT::Glue outputs and inputs. This also // ensure they are scheduled in order of increasing addresses. SDNode *Lead = Loads[0]; - AddFlags(Lead, SDValue(0, 0), true, DAG); + AddGlue(Lead, SDValue(0, 0), true, DAG); - SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1); + SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1); for (unsigned I = 1, E = Loads.size(); I != E; ++I) { - bool OutFlag = I < E - 1; + bool OutGlue = I < E - 1; SDNode *Load = Loads[I]; - AddFlags(Load, InFlag, OutFlag, DAG); + AddGlue(Load, InGlue, OutGlue, DAG); - if (OutFlag) - InFlag = SDValue(Load, Load->getNumValues() - 1); + if (OutGlue) + InGlue = SDValue(Load, Load->getNumValues() - 1); ++LoadsClustered; } @@ -266,68 +266,75 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // FIXME: Multiply by 2 because we may clone nodes during scheduling. // This is a temporary workaround. SUnits.reserve(NumNodes * 2); - + // Add all nodes in depth first order. SmallVector<SDNode*, 64> Worklist; SmallPtrSet<SDNode*, 64> Visited; Worklist.push_back(DAG->getRoot().getNode()); Visited.insert(DAG->getRoot().getNode()); - + while (!Worklist.empty()) { SDNode *NI = Worklist.pop_back_val(); - + // Add all operands to the worklist unless they've already been added. for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) if (Visited.insert(NI->getOperand(i).getNode())) Worklist.push_back(NI->getOperand(i).getNode()); - + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. continue; - + // If this node has already been processed, stop now. if (NI->getNodeId() != -1) continue; - + SUnit *NodeSUnit = NewSUnit(NI); - - // See if anything is flagged to this node, if so, add them to flagged - // nodes. Nodes can have at most one flag input and one flag output. Flags - // are required to be the last operand and result of a node. - - // Scan up to find flagged preds. + + // See if anything is glued to this node, if so, add them to glued + // nodes. Nodes can have at most one glue input and one glue output. Glue + // is required to be the last operand and result of a node. + + // Scan up to find glued preds. SDNode *N = NI; while (N->getNumOperands() && - N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { N = N->getOperand(N->getNumOperands()-1).getNode(); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; } - - // Scan down to find any flagged succs. + + // Scan down to find any glued succs. N = NI; - while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { - SDValue FlagVal(N, N->getNumValues()-1); - - // There are either zero or one users of the Flag result. - bool HasFlagUse = false; - for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + while (N->getValueType(N->getNumValues()-1) == MVT::Glue) { + SDValue GlueVal(N, N->getNumValues()-1); + + // There are either zero or one users of the Glue result. + bool HasGlueUse = false; + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E; ++UI) - if (FlagVal.isOperandOf(*UI)) { - HasFlagUse = true; + if (GlueVal.isOperandOf(*UI)) { + HasGlueUse = true; assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); N = *UI; + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; break; } - if (!HasFlagUse) break; + if (!HasGlueUse) break; } - - // If there are flag operands involved, N is now the bottom-most node - // of the sequence of nodes that are flagged together. + + // If there are glue operands involved, N is now the bottom-most node + // of the sequence of nodes that are glued together. // Update the SUnit. NodeSUnit->setNode(N); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); + // Compute NumRegDefsLeft. This must be done before AddSchedEdges. + InitNumRegDefsLeft(NodeSUnit); + // Assign the Latency field of NodeSUnit using target-provided information. ComputeLatency(NodeSUnit); } @@ -343,7 +350,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { SUnit *SU = &SUnits[su]; SDNode *MainNode = SU->getNode(); - + if (MainNode->isMachineOpcode()) { unsigned Opc = MainNode->getMachineOpcode(); const TargetInstrDesc &TID = TII->get(Opc); @@ -356,9 +363,9 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (TID.isCommutable()) SU->isCommutable = true; } - + // Find all predecessors and successors of the group. - for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) { + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).getImplicitDefs()) { SU->hasPhysRegClobbers = true; @@ -368,7 +375,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) SU->hasPhysRegDefs = true; } - + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *OpN = N->getOperand(i).getNode(); if (isPassiveNode(OpN)) continue; // Not scheduled. @@ -377,7 +384,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (OpSU == SU) continue; // In the same group. EVT OpVT = N->getOperand(i).getValueType(); - assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); + assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); bool isChain = OpVT == MVT::Other; unsigned PhysReg = 0; @@ -403,7 +410,13 @@ void ScheduleDAGSDNodes::AddSchedEdges() { ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } - SU->addPred(dep); + if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) { + // Multiple register uses are combined in the same SUnit. For example, + // we could have a set of glued nodes with all their defs consumed by + // another set of glued nodes. Register pressure tracking sees this as + // a single use, so to keep pressure balanced we reduce the defs. + --OpSU->NumRegDefsLeft; + } } } } @@ -412,7 +425,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// BuildSchedGraph - Build the SUnit graph from the selection dag that we /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents -/// flagged together nodes with a single SUnit. +/// glued together nodes with a single SUnit. void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { // Cluster certain nodes which should be scheduled together. ClusterNodes(); @@ -422,6 +435,69 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { AddSchedEdges(); } +// Initialize NumNodeDefs for the current Node's opcode. +void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { + if (!Node->isMachineOpcode()) { + if (Node->getOpcode() == ISD::CopyFromReg) + NodeNumDefs = 1; + else + NodeNumDefs = 0; + return; + } + unsigned POpc = Node->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) { + // No register need be allocated for this. + NodeNumDefs = 0; + return; + } + unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs(); + // Some instructions define regs that are not represented in the selection DAG + // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. + NodeNumDefs = std::min(Node->getNumValues(), NRegDefs); + DefIdx = 0; +} + +// Construct a RegDefIter for this SUnit and find the first valid value. +ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU, + const ScheduleDAGSDNodes *SD) + : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) { + InitNodeNumDefs(); + Advance(); +} + +// Advance to the next valid value defined by the SUnit. +void ScheduleDAGSDNodes::RegDefIter::Advance() { + for (;Node;) { // Visit all glued nodes. + for (;DefIdx < NodeNumDefs; ++DefIdx) { + if (!Node->hasAnyUseOfValue(DefIdx)) + continue; + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) { + // Propagate the incoming (full-register) type. I doubt it's needed. + ValueType = Node->getOperand(0).getValueType(); + } + else { + ValueType = Node->getValueType(DefIdx); + } + ++DefIdx; + return; // Found a normal regdef. + } + Node = Node->getGluedNode(); + if (Node == NULL) { + return; // No values left to visit. + } + InitNodeNumDefs(); + } +} + +void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { + assert(SU->NumRegDefsLeft == 0 && "expect a new node"); + for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { + assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected"); + ++SU->NumRegDefsLeft; + } +} + void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { // Check to see if the scheduler cares about latencies. if (ForceUnitLatencies()) { @@ -429,20 +505,17 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { return; } - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - if (InstrItins.isEmpty()) { + if (!InstrItins || InstrItins->isEmpty()) { SU->Latency = 1; return; } - + // Compute the latency for the node. We use the sum of the latencies for - // all nodes flagged together into this SUnit. + // all nodes glued together into this SUnit. SU->Latency = 0; - for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) - if (N->isMachineOpcode()) { - SU->Latency += InstrItins. - getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass()); - } + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + if (N->isMachineOpcode()) + SU->Latency += TII->getInstrLatency(InstrItins, N); } void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, @@ -451,32 +524,25 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, if (ForceUnitLatencies()) return; - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - if (InstrItins.isEmpty()) - return; - if (dep.getKind() != SDep::Data) return; unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); - if (Def->isMachineOpcode()) { - const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); - if (DefIdx >= II.getNumDefs()) - return; - int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); - if (DefCycle < 0) - return; - int UseCycle = 1; - if (Use->isMachineOpcode()) { - const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); - UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); - } - if (UseCycle >= 0) { - int Latency = DefCycle - UseCycle + 1; - if (Latency >= 0) - dep.setLatency(Latency); - } + if (Use->isMachineOpcode()) + // Adjust the use operand index by num of defs. + OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); + int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); + if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && + !BB->succ_empty()) { + unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + // This copy is a liveout value. It is likely coalesced, so reduce the + // latency so not to penalize the def. + // FIXME: need target specific adjustment here? + Latency = (Latency > 1) ? Latency - 1 : 1; } + if (Latency >= 0) + dep.setLatency(Latency); } void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { @@ -487,14 +553,14 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { SU->getNode()->dump(DAG); dbgs() << "\n"; - SmallVector<SDNode *, 4> FlaggedNodes; - for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) - FlaggedNodes.push_back(N); - while (!FlaggedNodes.empty()) { + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { dbgs() << " "; - FlaggedNodes.back()->dump(DAG); + GluedNodes.back()->dump(DAG); dbgs() << "\n"; - FlaggedNodes.pop_back(); + GluedNodes.pop_back(); } } @@ -507,37 +573,25 @@ namespace { }; } -// ProcessSourceNode - Process nodes with source order numbers. These are added -// to a vector which EmitSchedule uses to determine how to insert dbg_value -// instructions in the right order. -static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, - InstrEmitter &Emitter, - DenseMap<SDValue, unsigned> &VRBaseMap, +/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node. +static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, + InstrEmitter &Emitter, SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, - SmallSet<unsigned, 8> &Seen) { - unsigned Order = DAG->GetOrdering(N); - if (!Order || !Seen.insert(Order)) - return; - - MachineBasicBlock *BB = Emitter.getBlock(); - if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { - // Did not insert any instruction. - Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); - return; - } - - Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); + DenseMap<SDValue, unsigned> &VRBaseMap, + unsigned Order) { if (!N->getHasDebugValue()) return; + // Opportunistically insert immediate dbg_value uses, i.e. those with source // order number right after the N. + MachineBasicBlock *BB = Emitter.getBlock(); MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N); for (unsigned i = 0, e = DVs.size(); i != e; ++i) { if (DVs[i]->isInvalidated()) continue; unsigned DVOrder = DVs[i]->getOrder(); - if (DVOrder == ++Order) { + if (!Order || DVOrder == ++Order) { MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap); if (DbgMI) { Orders.push_back(std::make_pair(DVOrder, DbgMI)); @@ -548,6 +602,33 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, } } +// ProcessSourceNode - Process nodes with source order numbers. These are added +// to a vector which EmitSchedule uses to determine how to insert dbg_value +// instructions in the right order. +static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, + InstrEmitter &Emitter, + DenseMap<SDValue, unsigned> &VRBaseMap, + SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, + SmallSet<unsigned, 8> &Seen) { + unsigned Order = DAG->GetOrdering(N); + if (!Order || !Seen.insert(Order)) { + // Process any valid SDDbgValues even if node does not have any order + // assigned. + ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0); + return; + } + + MachineBasicBlock *BB = Emitter.getBlock(); + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { + // Did not insert any instruction. + Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); + return; + } + + Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); + ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); +} + /// EmitSchedule - Emit the machine code in scheduled order. MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { @@ -578,25 +659,25 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { } // For pre-regalloc scheduling, create instructions corresponding to the - // SDNode and any flagged SDNodes and append them to the block. + // SDNode and any glued SDNodes and append them to the block. if (!SU->getNode()) { // Emit a copy. EmitPhysRegCopy(SU, CopyVRBaseMap); continue; } - SmallVector<SDNode *, 4> FlaggedNodes; - for (SDNode *N = SU->getNode()->getFlaggedNode(); N; - N = N->getFlaggedNode()) - FlaggedNodes.push_back(N); - while (!FlaggedNodes.empty()) { - SDNode *N = FlaggedNodes.back(); - Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned, + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode()->getGluedNode(); N; + N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { + SDNode *N = GluedNodes.back(); + Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen); - FlaggedNodes.pop_back(); + GluedNodes.pop_back(); } Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); @@ -625,16 +706,8 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // Insert all SDDbgValue's whose order(s) are before "Order". if (!MI) continue; -#ifndef NDEBUG - unsigned LastDIOrder = 0; -#endif for (; DI != DE && (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) { -#ifndef NDEBUG - assert((*DI)->getOrder() >= LastDIOrder && - "SDDbgValue nodes must be in source order!"); - LastDIOrder = (*DI)->getOrder(); -#endif if ((*DI)->isInvalidated()) continue; MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 842fc8c..cc7310e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -20,13 +20,13 @@ namespace llvm { /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. - /// + /// /// Edges between SUnits are initially based on edges in the SelectionDAG, /// and additional edges can be added by the schedulers as heuristics. /// SDNodes such as Constants, Registers, and a few others that are not /// interesting to schedulers are not allocated SUnits. /// - /// SDNodes with MVT::Flag operands are grouped along with the flagged + /// SDNodes with MVT::Glue operands are grouped along with the flagged /// nodes into a single SUnit so that they are scheduled together. /// /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output @@ -36,6 +36,7 @@ namespace llvm { class ScheduleDAGSDNodes : public ScheduleDAG { public: SelectionDAG *DAG; // DAG of the current basic block + const InstrItineraryData *InstrItins; explicit ScheduleDAGSDNodes(MachineFunction &mf); @@ -72,13 +73,17 @@ namespace llvm { /// predecessors / successors info nor the temporary scheduling states. /// SUnit *Clone(SUnit *N); - + /// BuildSchedGraph - Build the SUnit graph from the selection dag that we /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. virtual void BuildSchedGraph(AliasAnalysis *AA); + /// InitNumRegDefsLeft - Determine the # of regs defined by this node. + /// + void InitNumRegDefsLeft(SUnit *SU); + /// ComputeLatency - Compute node latency. /// virtual void ComputeLatency(SUnit *SU); @@ -105,6 +110,30 @@ namespace llvm { virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; + /// RegDefIter - In place iteration over the values defined by an + /// SUnit. This does not need copies of the iterator or any other STLisms. + /// The iterator creates itself, rather than being provided by the SchedDAG. + class RegDefIter { + const ScheduleDAGSDNodes *SchedDAG; + const SDNode *Node; + unsigned DefIdx; + unsigned NodeNumDefs; + EVT ValueType; + public: + RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); + + bool IsValid() const { return Node != NULL; } + + EVT GetValue() const { + assert(IsValid() && "bad iterator"); + return ValueType; + } + + void Advance(); + private: + void InitNodeNumDefs(); + }; + private: /// ClusterNeighboringLoads - Cluster loads from "near" addresses into /// combined SUnits. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ad06ebd..2fb2f2d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -31,7 +31,6 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetOptions.h" @@ -44,7 +43,7 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/System/Mutex.h" +#include "llvm/Support/Mutex.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -111,7 +110,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, /// BUILD_VECTOR where all of the elements are ~0 or undef. bool ISD::isBuildVectorAllOnes(const SDNode *N) { // Look through a bit convert. - if (N->getOpcode() == ISD::BIT_CONVERT) + if (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -152,7 +151,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { /// BUILD_VECTOR where all of the elements are 0 or undef. bool ISD::isBuildVectorAllZeros(const SDNode *N) { // Look through a bit convert. - if (N->getOpcode() == ISD::BIT_CONVERT) + if (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -199,6 +198,8 @@ bool ISD::isScalarToVector(const SDNode *N) { if (N->getOperand(0).getOpcode() == ISD::UNDEF) return false; unsigned NumElems = N->getNumOperands(); + if (NumElems == 1) + return false; for (unsigned i = 1; i < NumElems; ++i) { SDValue V = N->getOperand(i); if (V.getOpcode() != ISD::UNDEF) @@ -489,7 +490,7 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, /// doNotCSE - Return true if CSE should not be performed for this node. static bool doNotCSE(SDNode *N) { - if (N->getValueType(0) == MVT::Flag) + if (N->getValueType(0) == MVT::Glue) return true; // Never CSE anything that produces a flag. switch (N->getOpcode()) { @@ -501,7 +502,7 @@ static bool doNotCSE(SDNode *N) { // Check that remaining values produced are not flags. for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) - if (N->getValueType(i) == MVT::Flag) + if (N->getValueType(i) == MVT::Glue) return true; // Never CSE anything that produces a flag. return false; @@ -609,9 +610,6 @@ void SelectionDAG::DeallocateNode(SDNode *N) { bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { bool Erased = false; switch (N->getOpcode()) { - case ISD::EntryToken: - llvm_unreachable("EntryToken should not be in CSEMaps!"); - return false; case ISD::HANDLENODE: return false; // noop. case ISD::CONDCODE: assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && @@ -641,6 +639,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { } default: // Remove it from the CSE Map. + assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!"); + assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!"); Erased = CSEMap.RemoveNode(N); break; } @@ -648,7 +648,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { // Verify that the node was actually in one of the CSE maps, unless it has a // flag result (which cannot be CSE'd) or is one of the special cases that are // not subject to CSE. - if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && + if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue && !N->isMachineOpcode() && !doNotCSE(N)) { N->dump(this); dbgs() << "\n"; @@ -743,8 +743,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, return Node; } -/// VerifyNode - Sanity check the given node. Aborts if it is invalid. -void SelectionDAG::VerifyNode(SDNode *N) { +#ifndef NDEBUG +/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid. +static void VerifyNodeCommon(SDNode *N) { switch (N->getOpcode()) { default: break; @@ -778,6 +779,44 @@ void SelectionDAG::VerifyNode(SDNode *N) { } } +/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. +static void VerifySDNode(SDNode *N) { + // The SDNode allocators cannot be used to allocate nodes with fields that are + // not present in an SDNode! + assert(!isa<MemSDNode>(N) && "Bad MemSDNode!"); + assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!"); + assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!"); + assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!"); + assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!"); + assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!"); + assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!"); + assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!"); + assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!"); + assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!"); + assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!"); + assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!"); + assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!"); + assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!"); + assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!"); + assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!"); + assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!"); + assert(!isa<VTSDNode>(N) && "Bad VTSDNode!"); + assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!"); + + VerifyNodeCommon(N); +} + +/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is +/// invalid. +static void VerifyMachineNode(SDNode *N) { + // The MachineNode allocators cannot be used to allocate nodes with fields + // that are not present in a MachineNode! + // Currently there are no such nodes. + + VerifyNodeCommon(N); +} +#endif // NDEBUG + /// getEVTAlignment - Compute the default alignment value for the /// given type. /// @@ -1315,7 +1354,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - + SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -1365,11 +1404,11 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0); ID.AddPointer(MD); - + void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - + SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -1613,7 +1652,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Also compute a conserative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - KnownOne.clear(); + KnownOne.clearAllBits(); unsigned TrailZ = KnownZero.countTrailingOnes() + KnownZero2.countTrailingOnes(); unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + @@ -1636,8 +1675,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, AllOnes, KnownZero2, KnownOne2, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); - KnownOne2.clear(); - KnownZero2.clear(); + KnownOne2.clearAllBits(); + KnownZero2.clearAllBits(); ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); @@ -1765,7 +1804,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // If the sign extended bits are demanded, we know that the sign // bit is demanded. - InSignBit.zext(BitWidth); + InSignBit = InSignBit.zext(BitWidth); if (NewBits.getBoolValue()) InputDemandedBits |= InSignBit; @@ -1792,7 +1831,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::CTPOP: { unsigned LowBits = Log2_32(BitWidth)+1; KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); - KnownOne.clear(); + KnownOne.clearAllBits(); return; } case ISD::LOAD: { @@ -1808,13 +1847,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask; - InMask.trunc(InBits); - KnownZero.trunc(InBits); - KnownOne.trunc(InBits); + APInt InMask = Mask.trunc(InBits); + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; return; } @@ -1823,16 +1861,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InSignBit = APInt::getSignBit(InBits); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask; - InMask.trunc(InBits); + APInt InMask = Mask.trunc(InBits); // If any of the sign extended bits are demanded, we know that the sign // bit is demanded. Temporarily set this bit in the mask for our callee. if (NewBits.getBoolValue()) InMask |= InSignBit; - KnownZero.trunc(InBits); - KnownOne.trunc(InBits); + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); // Note if the sign bit is known to be zero or one. @@ -1844,13 +1881,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // If the sign bit wasn't actually demanded by our caller, we don't // want it set in the KnownZero and KnownOne result values. Reset the // mask and reapply it to the result values. - InMask = Mask; - InMask.trunc(InBits); + InMask = Mask.trunc(InBits); KnownZero &= InMask; KnownOne &= InMask; - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); // If the sign bit is known zero or one, the top bits match. if (SignBitKnownZero) @@ -1862,26 +1898,24 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask; - InMask.trunc(InBits); - KnownZero.trunc(InBits); - KnownOne.trunc(InBits); + APInt InMask = Mask.trunc(InBits); + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); return; } case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask; - InMask.zext(InBits); - KnownZero.zext(InBits); - KnownOne.zext(InBits); + APInt InMask = Mask.zext(InBits); + KnownZero = KnownZero.zext(InBits); + KnownOne = KnownOne.zext(InBits); ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero.trunc(BitWidth); - KnownOne.trunc(BitWidth); + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); break; } case ISD::AssertZext: { @@ -1921,7 +1955,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } } // fall through - case ISD::ADD: { + case ISD::ADD: + case ISD::ADDE: { // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. @@ -1936,7 +1971,17 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownZeroOut = std::min(KnownZeroOut, KnownZero2.countTrailingOnes()); - KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); + if (Op.getOpcode() == ISD::ADD) { + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); + return; + } + + // With ADDE, a carry bit may be added in, so we can only use this + // information if we know (at least) that the low two bits are clear. We + // then return to the caller that the low bit is unknown but that other bits + // are known zero. + if (KnownZeroOut >= 2) // ADDE + KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut); return; } case ISD::SREM: @@ -1991,10 +2036,19 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); - KnownOne.clear(); + KnownOne.clearAllBits(); KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; return; } + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + if (unsigned Align = InferPtrAlignment(Op)) { + // The low bits are known zero if the pointer is aligned. + KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align)); + return; + } + break; + default: // Allow the target to implement this method for its nodes. if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { @@ -2234,6 +2288,25 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); } +/// isBaseWithConstantOffset - Return true if the specified operand is an +/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an +/// ISD::OR with a ConstantSDNode that is guaranteed to have the same +/// semantics as an ADD. This handles the equivalence: +/// X|Cst == X+Cst iff X&Cst = 0. +bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { + if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || + !isa<ConstantSDNode>(Op.getOperand(1))) + return false; + + if (Op.getOpcode() == ISD::OR && + !MaskedValueIsZero(Op.getOperand(0), + cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue())) + return false; + + return true; +} + + bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { // If we're told that NaNs won't happen, assume they won't. if (NoNaNsFPMath) @@ -2295,7 +2368,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifySDNode(N); #endif return SDValue(N, 0); } @@ -2308,23 +2381,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, switch (Opcode) { default: break; case ISD::SIGN_EXTEND: - return getConstant(APInt(Val).sextOrTrunc(VT.getSizeInBits()), VT); + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::TRUNCATE: - return getConstant(APInt(Val).zextOrTrunc(VT.getSizeInBits()), VT); + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { - const uint64_t zero[] = {0, 0}; // No compile time operations on ppcf128. if (VT == MVT::ppcf128) break; - APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); + APFloat apf(APInt::getNullValue(VT.getSizeInBits())); (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, APFloat::rmNearestTiesToEven); return getConstantFP(apf, VT); } - case ISD::BIT_CONVERT: + case ISD::BITCAST: if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) return getConstantFP(Val.bitsToFloat(), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) @@ -2375,7 +2447,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, APInt api(VT.getSizeInBits(), 2, x); return getConstant(api, VT); } - case ISD::BIT_CONVERT: + case ISD::BITCAST: if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) @@ -2477,13 +2549,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return Operand.getNode()->getOperand(0); } break; - case ISD::BIT_CONVERT: + case ISD::BITCAST: // Basic sanity checking. assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() - && "Cannot BIT_CONVERT between types of different sizes!"); + && "Cannot BITCAST between types of different sizes!"); if (VT == Operand.getValueType()) return Operand; // noop conversion. - if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x) - return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0)); + if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x) + return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0)); if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); break; @@ -2519,7 +2591,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDNode *N; SDVTList VTs = getVTList(VT); - if (VT != MVT::Flag) { // Don't CSE flag producing nodes + if (VT != MVT::Glue) { // Don't CSE flag producing nodes FoldingSetNodeID ID; SDValue Ops[1] = { Operand }; AddNodeIDNode(ID, Opcode, VTs, Ops, 1); @@ -2535,7 +2607,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifySDNode(N); #endif return SDValue(N, 0); } @@ -2676,6 +2748,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, "Shift operators return type must be the same as their first arg"); assert(VT.isInteger() && N2.getValueType().isInteger() && "Shifts only work on integers"); + // Verify that the shift amount VT is bit enough to hold valid shift + // amounts. This catches things like trying to shift an i1024 value by an + // i8, which is easy to fall into in generic code that uses + // TLI.getShiftAmount(). + assert(N2.getValueType().getSizeInBits() >= + Log2_32_Ceil(N1.getValueType().getSizeInBits()) && + "Invalid use of small shift amount with oversized value!"); // Always fold shifts of i1 values so the code generator doesn't need to // handle them. Since we know the size of the shift has to be less than the @@ -2820,11 +2899,30 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return getConstant(ShiftedVal.trunc(ElementSize), VT); } break; - case ISD::EXTRACT_SUBVECTOR: - if (N1.getValueType() == VT) // Trivial extraction. - return N1; + case ISD::EXTRACT_SUBVECTOR: { + SDValue Index = N2; + if (VT.isSimple() && N1.getValueType().isSimple()) { + assert(VT.isVector() && N1.getValueType().isVector() && + "Extract subvector VTs must be a vectors!"); + assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() && + "Extract subvector VTs must have the same element type!"); + assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() && + "Extract subvector must be from larger vector to smaller vector!"); + + if (isa<ConstantSDNode>(Index.getNode())) { + assert((VT.getVectorNumElements() + + cast<ConstantSDNode>(Index.getNode())->getZExtValue() + <= N1.getValueType().getVectorNumElements()) + && "Extract subvector overflow!"); + } + + // Trivial extraction. + if (VT.getSimpleVT() == N1.getValueType().getSimpleVT()) + return N1; + } break; } + } if (N1C) { if (N2C) { @@ -2961,7 +3059,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // Memoize this node if possible. SDNode *N; SDVTList VTs = getVTList(VT); - if (VT != MVT::Flag) { + if (VT != MVT::Glue) { SDValue Ops[] = { N1, N2 }; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 2); @@ -2977,7 +3075,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifySDNode(N); #endif return SDValue(N, 0); } @@ -3019,7 +3117,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); break; - case ISD::BIT_CONVERT: + case ISD::INSERT_SUBVECTOR: { + SDValue Index = N3; + if (VT.isSimple() && N1.getValueType().isSimple() + && N2.getValueType().isSimple()) { + assert(VT.isVector() && N1.getValueType().isVector() && + N2.getValueType().isVector() && + "Insert subvector VTs must be a vectors"); + assert(VT == N1.getValueType() && + "Dest and insert subvector source types must match!"); + assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() && + "Insert subvector must be from smaller vector to larger vector!"); + if (isa<ConstantSDNode>(Index.getNode())) { + assert((N2.getValueType().getVectorNumElements() + + cast<ConstantSDNode>(Index.getNode())->getZExtValue() + <= VT.getVectorNumElements()) + && "Insert subvector overflow!"); + } + + // Trivial insertion. + if (VT.getSimpleVT() == N2.getValueType().getSimpleVT()) + return N2; + } + break; + } + case ISD::BITCAST: // Fold bit_convert nodes from a type to themselves. if (N1.getValueType() == VT) return N1; @@ -3029,7 +3151,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // Memoize node if it doesn't produce a flag. SDNode *N; SDVTList VTs = getVTList(VT); - if (VT != MVT::Flag) { + if (VT != MVT::Glue) { SDValue Ops[] = { N1, N2, N3 }; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); @@ -3045,7 +3167,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifySDNode(N); #endif return SDValue(N, 0); } @@ -3087,6 +3209,17 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { &ArgChains[0], ArgChains.size()); } +/// SplatByte - Distribute ByteVal over NumBits bits. +static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { + APInt Val = APInt(NumBits, ByteVal); + unsigned Shift = 8; + for (unsigned i = NumBits; i > 8; i >>= 1) { + Val = (Val << Shift) | Val; + Shift <<= 1; + } + return Val; +} + /// getMemsetValue - Vectorized representation of the memset value /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, @@ -3095,27 +3228,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, unsigned NumBits = VT.getScalarType().getSizeInBits(); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { - APInt Val = APInt(NumBits, C->getZExtValue() & 255); - unsigned Shift = 8; - for (unsigned i = NumBits; i > 8; i >>= 1) { - Val = (Val << Shift) | Val; - Shift <<= 1; - } + APInt Val = SplatByte(NumBits, C->getZExtValue() & 255); if (VT.isInteger()) return DAG.getConstant(Val, VT); return DAG.getConstantFP(APFloat(Val), VT); } - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); - unsigned Shift = 8; - for (unsigned i = NumBits; i > 8; i >>= 1) { - Value = DAG.getNode(ISD::OR, dl, VT, - DAG.getNode(ISD::SHL, dl, VT, Value, - DAG.getConstant(Shift, - TLI.getShiftAmountTy())), - Value); - Shift <<= 1; + if (NumBits > 8) { + // Use a multiplication with 0x010101... to extend the input to the + // required length. + APInt Magic = SplatByte(NumBits, 0x01); + Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT)); } return Value; @@ -3131,13 +3255,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, if (Str.empty()) { if (VT.isInteger()) return DAG.getConstant(0, VT); - else if (VT.getSimpleVT().SimpleTy == MVT::f32 || - VT.getSimpleVT().SimpleTy == MVT::f64) + else if (VT == MVT::f32 || VT == MVT::f64) return DAG.getConstantFP(0.0, VT); else if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts))); } else @@ -3234,15 +3357,6 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, if (VT.bitsGT(LVT)) VT = LVT; } - - // If we're optimizing for size, and there is a limit, bump the maximum number - // of operations inserted down to 4. This is a wild guess that approximates - // the size of a call to memcpy or memset (3 arguments + call). - if (Limit != ~0U) { - const Function *F = DAG.getMachineFunction().getFunction(); - if (F->hasFnAttr(Attribute::OptimizeForSize)) - Limit = 4; - } unsigned NumMemOps = 0; while (Size != 0) { @@ -3276,18 +3390,22 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, SDValue Src, uint64_t Size, unsigned Align, bool isVol, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff) { + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { // Turn a memcpy of undef to nop. if (Src.getOpcode() == ISD::UNDEF) return Chain; // Expand memcpy to a series of load and store ops if the size operand falls // below a certain threshold. + // TODO: In the AlwaysInline case, if the size is big then generate a loop + // rather than maybe a humongous number of loads and stores. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector<EVT> MemOps; bool DstAlignCanChange = false; - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3297,8 +3415,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, std::string Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); - unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(); - + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), (isZeroStr ? 0 : SrcAlign), @@ -3334,7 +3452,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, isVol, false, Align); + DstPtrInfo.getWithOffset(DstOff), isVol, + false, Align); } else { // The type might not be legal for the target. This should only happen // if the type is smaller than a legal type, as on PPC, so the right @@ -3343,14 +3462,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // FIXME does the case above also need this? EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); - Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain, + Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, VT, isVol, false, + SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false, MinAlign(SrcAlign, SrcOff)); Store = DAG.getTruncStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, VT, isVol, false, - Align); + DstPtrInfo.getWithOffset(DstOff), VT, isVol, + false, Align); } OutChains.push_back(Store); SrcOff += VTSize; @@ -3366,8 +3485,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, SDValue Src, uint64_t Size, unsigned Align, bool isVol, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff) { + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { // Turn a memmove of undef to nop. if (Src.getOpcode() == ISD::UNDEF) return Chain; @@ -3377,14 +3496,16 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector<EVT> MemOps; bool DstAlignCanChange = false; - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; - unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(); + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), @@ -3414,7 +3535,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = DAG.getLoad(VT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign); + SrcPtrInfo.getWithOffset(SrcOff), isVol, + false, SrcAlign); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -3429,7 +3551,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Store = DAG.getStore(Chain, dl, LoadValues[i], getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, isVol, false, Align); + DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); OutChains.push_back(Store); DstOff += VTSize; } @@ -3442,7 +3564,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, bool isVol, - const Value *DstSV, uint64_t DstSVOff) { + MachinePointerInfo DstPtrInfo) { // Turn a memset of undef to nop. if (Src.getOpcode() == ISD::UNDEF) return Chain; @@ -3452,13 +3574,15 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector<EVT> MemOps; bool DstAlignCanChange = false; - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; bool NonScalarIntSafe = isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); - if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(), + if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, NonScalarIntSafe, false, DAG, TLI)) return SDValue(); @@ -3477,15 +3601,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SmallVector<SDValue, 8> OutChains; uint64_t DstOff = 0; unsigned NumMemOps = MemOps.size(); + + // Find the largest store and generate the bit pattern for it. + EVT LargestVT = MemOps[0]; + for (unsigned i = 1; i < NumMemOps; i++) + if (MemOps[i].bitsGT(LargestVT)) + LargestVT = MemOps[i]; + SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl); + for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; - unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value = getMemsetValue(Src, VT, DAG, dl); + + // If this store is smaller than the largest store see whether we can get + // the smaller value for free with a truncate. + SDValue Value = MemSetValue; + if (VT.bitsLT(LargestVT)) { + if (!LargestVT.isVector() && !VT.isVector() && + TLI.isTruncateFree(LargestVT, VT)) + Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); + else + Value = getMemsetValue(Src, VT, DAG, dl); + } + assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, isVol, false, 0); + DstPtrInfo.getWithOffset(DstOff), + isVol, false, Align); OutChains.push_back(Store); - DstOff += VTSize; + DstOff += VT.getSizeInBits() / 8; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -3495,8 +3638,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff) { + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3508,7 +3651,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),Align, - isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff); + isVol, false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -3518,7 +3661,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Result = TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, - DstSV, DstSVOff, SrcSV, SrcSVOff); + DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; @@ -3528,7 +3671,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, assert(ConstantSize && "AlwaysInline requires a constant size!"); return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Align, isVol, - true, DstSV, DstSVOff, SrcSV, SrcSVOff); + true, DstPtrInfo, SrcPtrInfo); } // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc @@ -3559,8 +3702,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff) { + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3573,7 +3716,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Result = getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Align, isVol, - false, DstSV, DstSVOff, SrcSV, SrcSVOff); + false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -3582,7 +3725,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. SDValue Result = TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, - DstSV, DstSVOff, SrcSV, SrcSVOff); + DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; @@ -3611,7 +3754,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, - const Value *DstSV, uint64_t DstSVOff) { + MachinePointerInfo DstPtrInfo) { // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. @@ -3623,7 +3766,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), - Align, isVol, DstSV, DstSVOff); + Align, isVol, DstPtrInfo); if (Result.getNode()) return Result; @@ -3633,11 +3776,11 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. SDValue Result = TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, - DstSV, DstSVOff); + DstPtrInfo); if (Result.getNode()) return Result; - // Emit a library call. + // Emit a library call. const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -3669,19 +3812,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, } SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, - SDValue Chain, - SDValue Ptr, SDValue Cmp, - SDValue Swp, const Value* PtrVal, + SDValue Chain, SDValue Ptr, SDValue Cmp, + SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); - // Check if the memory reference references a frame index - if (!PtrVal) - if (const FrameIndexSDNode *FI = - dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) - PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex()); - MachineFunction &MF = getMachineFunction(); unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; @@ -3689,8 +3825,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Flags |= MachineMemOperand::MOVolatile; MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrVal, Flags, 0, - MemVT.getStoreSize(), Alignment); + MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO); } @@ -3729,12 +3864,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); - // Check if the memory reference references a frame index - if (!PtrVal) - if (const FrameIndexSDNode *FI = - dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) - PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex()); - MachineFunction &MF = getMachineFunction(); unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; @@ -3742,7 +3871,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Flags |= MachineMemOperand::MOVolatile; MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrVal, Flags, 0, + MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, MemVT.getStoreSize(), Alignment); return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); @@ -3785,7 +3914,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, } /// getMergeValues - Create a MERGE_VALUES node from the given operands. -/// Allowed to return something different (and simpler) if Simplify is true. SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, DebugLoc dl) { if (NumOps == 1) @@ -3803,18 +3931,18 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps, - EVT MemVT, const Value *srcValue, int SVOff, + EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, bool ReadMem, bool WriteMem) { return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, - MemVT, srcValue, SVOff, Align, Vol, + MemVT, PtrInfo, Align, Vol, ReadMem, WriteMem); } SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, const SDValue *Ops, unsigned NumOps, - EVT MemVT, const Value *srcValue, int SVOff, + EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, bool ReadMem, bool WriteMem) { if (Align == 0) // Ensure that codegen never sees alignment 0 @@ -3829,8 +3957,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, if (Vol) Flags |= MachineMemOperand::MOVolatile; MachineMemOperand *MMO = - MF.getMachineMemOperand(srcValue, Flags, SVOff, - MemVT.getStoreSize(), Align); + MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align); return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); } @@ -3841,13 +3968,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, EVT MemVT, MachineMemOperand *MMO) { assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || + Opcode == ISD::PREFETCH || (Opcode <= INT_MAX && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); // Memoize the node unless it returns a flag. MemIntrinsicSDNode *N; - if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; @@ -3867,36 +3995,70 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, return SDValue(N, 0); } +/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a +/// MachinePointerInfo record from it. This is particularly useful because the +/// code generator has many cases where it doesn't bother passing in a +/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". +static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) { + // If this is FI+Offset, we can model it. + if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) + return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset); + + // If this is (FI+Offset1)+Offset2, we can model it. + if (Ptr.getOpcode() != ISD::ADD || + !isa<ConstantSDNode>(Ptr.getOperand(1)) || + !isa<FrameIndexSDNode>(Ptr.getOperand(0))) + return MachinePointerInfo(); + + int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + return MachinePointerInfo::getFixedStack(FI, Offset+ + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); +} + +/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a +/// MachinePointerInfo record from it. This is particularly useful because the +/// code generator has many cases where it doesn't bother passing in a +/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". +static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) { + // If the 'Offset' value isn't a constant, we can't handle this. + if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) + return InferPointerInfo(Ptr, OffsetNode->getSExtValue()); + if (OffsetOp.getOpcode() == ISD::UNDEF) + return InferPointerInfo(Ptr); + return MachinePointerInfo(); +} + + SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, - const Value *SV, int SVOffset, EVT MemVT, + MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, - unsigned Alignment) { + unsigned Alignment, const MDNode *TBAAInfo) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); - // Check if the memory reference references a frame index - if (!SV) - if (const FrameIndexSDNode *FI = - dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) - SV = PseudoSourceValue::getFixedStack(FI->getIndex()); - - MachineFunction &MF = getMachineFunction(); unsigned Flags = MachineMemOperand::MOLoad; if (isVolatile) Flags |= MachineMemOperand::MOVolatile; if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; + + // If we don't have a PtrInfo, infer the trivial frame index case to simplify + // clients. + if (PtrInfo.V == 0) + PtrInfo = InferPointerInfo(Ptr, Offset); + + MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = - MF.getMachineMemOperand(SV, Flags, SVOffset, - MemVT.getStoreSize(), Alignment); + MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, + TBAAInfo); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, EVT MemVT, MachineMemOperand *MMO) { @@ -3943,25 +4105,26 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, - const Value *SV, int SVOffset, + MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - unsigned Alignment) { + unsigned Alignment, const MDNode *TBAAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, - SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment); + PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, SDValue Chain, SDValue Ptr, - const Value *SV, - int SVOffset, EVT MemVT, + MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, - unsigned Alignment) { + unsigned Alignment, const MDNode *TBAAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, - SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); + PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment, + TBAAInfo); } + SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM) { @@ -3969,33 +4132,32 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, assert(LD->getOffset().getOpcode() == ISD::UNDEF && "Load is already a indexed load!"); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, - LD->getChain(), Base, Offset, LD->getSrcValue(), - LD->getSrcValueOffset(), LD->getMemoryVT(), + LD->getChain(), Base, Offset, LD->getPointerInfo(), + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); } SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, - SDValue Ptr, const Value *SV, int SVOffset, + SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - unsigned Alignment) { + unsigned Alignment, const MDNode *TBAAInfo) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); - // Check if the memory reference references a frame index - if (!SV) - if (const FrameIndexSDNode *FI = - dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) - SV = PseudoSourceValue::getFixedStack(FI->getIndex()); - - MachineFunction &MF = getMachineFunction(); unsigned Flags = MachineMemOperand::MOStore; if (isVolatile) Flags |= MachineMemOperand::MOVolatile; if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; + + if (PtrInfo.V == 0) + PtrInfo = InferPointerInfo(Ptr); + + MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = - MF.getMachineMemOperand(SV, Flags, SVOffset, - Val.getValueType().getStoreSize(), Alignment); + MF.getMachineMemOperand(PtrInfo, Flags, + Val.getValueType().getStoreSize(), Alignment, + TBAAInfo); return getStore(Chain, dl, Val, Ptr, MMO); } @@ -4024,27 +4186,26 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, } SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, - SDValue Ptr, const Value *SV, - int SVOffset, EVT SVT, - bool isVolatile, bool isNonTemporal, - unsigned Alignment) { + SDValue Ptr, MachinePointerInfo PtrInfo, + EVT SVT,bool isVolatile, bool isNonTemporal, + unsigned Alignment, + const MDNode *TBAAInfo) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); - // Check if the memory reference references a frame index - if (!SV) - if (const FrameIndexSDNode *FI = - dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) - SV = PseudoSourceValue::getFixedStack(FI->getIndex()); - - MachineFunction &MF = getMachineFunction(); unsigned Flags = MachineMemOperand::MOStore; if (isVolatile) Flags |= MachineMemOperand::MOVolatile; if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; + + if (PtrInfo.V == 0) + PtrInfo = InferPointerInfo(Ptr); + + MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = - MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment); + MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment, + TBAAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } @@ -4170,7 +4331,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDNode *N; SDVTList VTs = getVTList(VT); - if (VT != MVT::Flag) { + if (VT != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); void *IP = 0; @@ -4186,7 +4347,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifySDNode(N); #endif return SDValue(N, 0); } @@ -4236,7 +4397,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, // Memoize the node unless it returns a flag. SDNode *N; - if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; @@ -4268,7 +4429,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, } AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifySDNode(N); #endif return SDValue(N, 0); } @@ -4645,7 +4806,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, unsigned NumOps) { // If an identical node already exists, use it. void *IP = 0; - if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { + if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) @@ -4845,9 +5006,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, const SDValue *Ops, unsigned NumOps) { - bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag; + bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; - void *IP; + void *IP = 0; if (DoCSE) { FoldingSetNodeID ID; @@ -4876,7 +5037,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifyMachineNode(N); #endif return N; } @@ -4907,7 +5068,7 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, const SDValue *Ops, unsigned NumOps) { - if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; @@ -5340,6 +5501,29 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { SD->setHasDebugValue(true); } +/// TransferDbgValues - Transfer SDDbgValues. +void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { + if (From == To || !From.getNode()->getHasDebugValue()) + return; + SDNode *FromNode = From.getNode(); + SDNode *ToNode = To.getNode(); + SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode); + SmallVector<SDDbgValue *, 2> ClonedDVs; + for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end(); + I != E; ++I) { + SDDbgValue *Dbg = *I; + if (Dbg->getKind() == SDDbgValue::SDNODE) { + SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(), + Dbg->getOffset(), Dbg->getDebugLoc(), + Dbg->getOrder()); + ClonedDVs.push_back(Clone); + } + } + for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(), + E = ClonedDVs.end(); I != E; ++I) + AddDbgValue(*I, ToNode, false); +} + //===----------------------------------------------------------------------===// // SDNode Class //===----------------------------------------------------------------------===// @@ -5367,7 +5551,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, } MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, - const SDValue *Ops, unsigned NumOps, EVT memvt, + const SDValue *Ops, unsigned NumOps, EVT memvt, MachineMemOperand *mmo) : SDNode(Opc, dl, VTs, Ops, NumOps), MemoryVT(memvt), MMO(mmo) { @@ -5386,7 +5570,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const { namespace { struct EVTArray { std::vector<EVT> VTs; - + EVTArray() { VTs.reserve(MVT::LAST_VALUETYPE); for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i) @@ -5406,7 +5590,7 @@ const EVT *SDNode::getValueTypeList(EVT VT) { sys::SmartScopedLock<true> Lock(*VTMutex); return &(*EVTs->insert(VT).first); } else { - assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE && + assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE && "Value type out of range!"); return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; } @@ -5478,9 +5662,9 @@ bool SDNode::isOperandOf(SDNode *N) const { /// reachesChainWithoutSideEffects - Return true if this operand (which must /// be a chain) reaches the specified operand without crossing any -/// side-effecting instructions. In practice, this looks through token -/// factors and non-volatile loads. In order to remain efficient, this only -/// looks a couple of nodes in, it does not do an exhaustive search. +/// side-effecting instructions on any chain path. In practice, this looks +/// through token factors and non-volatile loads. In order to remain efficient, +/// this only looks a couple of nodes in, it does not do an exhaustive search. bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth) const { if (*this == Dest) return true; @@ -5490,12 +5674,12 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, if (Depth == 0) return false; // If this is a token factor, all inputs to the TF happen in parallel. If any - // of the operands of the TF reach dest, then we can do the xform. + // of the operands of the TF does not reach dest, then we cannot do the xform. if (getOpcode() == ISD::TokenFactor) { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) - return true; - return false; + if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) + return false; + return true; } // Loads don't have side effects, look through them. @@ -5600,6 +5784,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP"; case ISD::ConstantPool: return "ConstantPool"; case ISD::ExternalSymbol: return "ExternalSymbol"; case ISD::BlockAddress: return "BlockAddress"; @@ -5690,6 +5875,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::INSERT_SUBVECTOR: return "insert_subvector"; case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; @@ -5723,7 +5909,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; - case ISD::BIT_CONVERT: return "bit_convert"; + case ISD::BITCAST: return "bit_convert"; case ISD::FP16_TO_FP32: return "fp16_to_fp32"; case ISD::FP32_TO_FP16: return "fp32_to_fp16"; @@ -5935,12 +6121,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << LBB->getName() << " "; OS << (const void*)BBDN->getBasicBlock() << ">"; } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { - if (G && R->getReg() && - TargetRegisterInfo::isPhysicalRegister(R->getReg())) { - OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg()); - } else { - OS << " %reg" << R->getReg(); - } + OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); } else if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(this)) { OS << "'" << ES->getSymbol() << "'"; @@ -5986,7 +6167,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { const char *AM = getIndexedModeName(ST->getAddressingMode()); if (*AM) OS << ", " << AM; - + OS << ">"; } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { OS << "<" << *M->getMemOperand() << ">"; @@ -6037,7 +6218,7 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, const SelectionDAG *G, unsigned depth, - unsigned indent) + unsigned indent) { if (depth == 0) return; @@ -6058,7 +6239,7 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, unsigned depth) const { printrWithDepthHelper(OS, this, G, depth, 0); -} +} void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { // Don't print impossibly deep things. @@ -6072,7 +6253,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { void SDNode::dumprFull(const SelectionDAG *G) const { // Don't print impossibly deep things. dumprWithDepth(G, 100); -} +} static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) @@ -6156,10 +6337,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { } -/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a -/// location that is 'Dist' units away from the location that the 'Base' load +/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a +/// location that is 'Dist' units away from the location that the 'Base' load /// is loading from. -bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, +bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const { if (LD->getChain() != Base->getChain()) return false; @@ -6180,11 +6361,11 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, if (FS != BFS || FS != (int)Bytes) return false; return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); } - if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) { - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1)); - if (V && (V->getSExtValue() == Dist*Bytes)) - return true; - } + + // Handle X+C + if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && + cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) + return true; const GlobalValue *GV1 = NULL; const GlobalValue *GV2 = NULL; @@ -6225,15 +6406,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { int64_t FrameOffset = 0; if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { FrameIdx = FI->getIndex(); - } else if (Ptr.getOpcode() == ISD::ADD && - isa<ConstantSDNode>(Ptr.getOperand(1)) && + } else if (isBaseWithConstantOffset(Ptr) && isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + // Handle FI+Cst FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); FrameOffset = Ptr.getConstantOperandVal(1); } if (FrameIdx != (1 << 31)) { - // FIXME: Handle FI+CST. const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo(); unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), FrameOffset); @@ -6354,7 +6534,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, if (OpVal.getOpcode() == ISD::UNDEF) SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) - SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). + SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize). zextOrTrunc(sz) << BitPos; else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos; @@ -6369,10 +6549,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, while (sz > 8) { unsigned HalfSize = sz / 2; - APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize); - APInt LowValue = APInt(SplatValue).trunc(HalfSize); - APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize); - APInt LowUndef = APInt(SplatUndef).trunc(HalfSize); + APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize); + APInt LowValue = SplatValue.trunc(HalfSize); + APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize); + APInt LowUndef = SplatUndef.trunc(HalfSize); // If the two halves do not match (ignoring undef bits), stop here. if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || @@ -6412,7 +6592,7 @@ static void checkForCyclesHelper(const SDNode *N, // If this node has already been checked, don't check it again. if (Checked.count(N)) return; - + // If a node has already been visited on this depth-first walk, reject it as // a cycle. if (!Visited.insert(N)) { @@ -6421,10 +6601,10 @@ static void checkForCyclesHelper(const SDNode *N, errs() << "Detected cycle in SelectionDAG\n"; abort(); } - + for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i) checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked); - + Checked.insert(N); Visited.erase(N); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e657445..452f561 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -15,6 +15,7 @@ #include "SDNodeDbgValue.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" @@ -43,9 +44,8 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Analysis/DebugInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" @@ -70,10 +70,28 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); +// Limit the width of DAG chains. This is important in general to prevent +// prevent DAG-based analysis from blowing up. For example, alias analysis and +// load clustering may not complete in reasonable time. It is difficult to +// recognize and avoid this situation within each individual analysis, and +// future analyses are likely to have the same behavior. Limiting DAG width is +// the safe approach, and will be especially important with global DAGs. +// +// MaxParallelChains default is arbitrarily high to avoid affecting +// optimization, but could be lowered to improve compile time. Any ld-ld-st-st +// sequence over this should have been converted to llvm.memcpy by the +// frontend. It easy to induce this behavior with .ll code such as: +// %buffer = alloca [4096 x i8] +// %data = load [4096 x i8]* %argPtr +// store [4096 x i8] %data, [4096 x i8]* %buffer +static cl::opt<unsigned> +MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"), + cl::init(64), cl::Hidden); + static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, EVT PartVT, EVT ValueVT); - + /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger then ValueVT then AssertOp can be used to specify whether the extra @@ -85,7 +103,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, ISD::NodeType AssertOp = ISD::DELETED_NODE) { if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT); - + assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; @@ -112,8 +130,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, RoundParts / 2, PartVT, HalfVT); } else { - Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]); - Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]); + Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); + Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); } if (TLI.isBigEndian()) @@ -145,8 +163,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) && "Unexpected split"); SDValue Lo, Hi; - Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]); - Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]); + Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); if (TLI.isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); @@ -188,7 +206,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, } if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) - return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); return SDValue(); @@ -206,7 +224,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; - + // Handle a multi-element vector. if (NumParts > 1) { EVT IntermediateVT, RegisterVT; @@ -219,7 +237,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); assert(RegisterVT == Parts[0].getValueType() && "Part type doesn't match part!"); - + // Assemble the parts into intermediate operands. SmallVector<SDValue, 8> Ops(NumIntermediates); if (NumIntermediates == NumParts) { @@ -238,20 +256,20 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, PartVT, IntermediateVT); } - + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, ValueVT, &Ops[0], NumIntermediates); } - + // There is now one part, held in Val. Correct it to match ValueVT. PartVT = Val.getValueType(); - + if (PartVT == ValueVT) return Val; - + if (PartVT.isVector()) { // If the element type of the source/dest vectors are the same, but the // parts vector has more elements than the value vector, then we have a @@ -262,12 +280,12 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, DAG.getIntPtrConstant(0)); - } - + } + // Vector/Vector bitcast. - return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } - + assert(ValueVT.getVectorElementType() == PartVT && ValueVT.getVectorNumElements() == 1 && "Only trivial scalar-to-vector conversions should get here!"); @@ -280,7 +298,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, EVT PartVT); - + /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. @@ -289,11 +307,11 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, EVT PartVT, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); - + // Handle the vector case separately. if (ValueVT.isVector()) return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT); - + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; @@ -316,14 +334,14 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { assert(PartVT.isInteger() && ValueVT.isInteger() && - "Unknown mismatch!"); + "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. assert(NumParts == 1 && PartVT != ValueVT); - Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. assert(PartVT.isInteger() && ValueVT.isInteger() && @@ -366,7 +384,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, // The number of parts is a power of 2. Repeatedly bisect the value using // EXTRACT_ELEMENT. - Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL, + Parts[0] = DAG.getNode(ISD::BITCAST, DL, EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()), Val); @@ -384,8 +402,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, ThisVT, Part0, DAG.getIntPtrConstant(0)); if (ThisBits == PartBits && ThisVT != PartVT) { - Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0); - Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1); + Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); + Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); } } } @@ -403,13 +421,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - + if (NumParts == 1) { if (PartVT == ValueVT) { // Nothing to do. } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { // Bitconvert vector->vector case. - Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (PartVT.isVector() && PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { @@ -420,7 +438,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val, DAG.getIntPtrConstant(i))); - + for (unsigned i = ValueVT.getVectorNumElements(), e = PartVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ElementVT)); @@ -428,7 +446,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); // FIXME: Use CONCAT for 2x -> 4x. - + //SDValue UndefElts = DAG.getUNDEF(VectorTy); //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); } else { @@ -439,11 +457,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getIntPtrConstant(0)); } - + Parts[0] = Val; return; } - + // Handle a multi-element vector. EVT IntermediateVT, RegisterVT; unsigned NumIntermediates; @@ -451,11 +469,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, IntermediateVT, NumIntermediates, RegisterVT); unsigned NumElements = ValueVT.getVectorNumElements(); - + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); - + // Split the vector into intermediate operands. SmallVector<SDValue, 8> Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { @@ -467,7 +485,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, DAG.getIntPtrConstant(i)); } - + // Split the intermediate operands into legal parts. if (NumParts == NumIntermediates) { // If the register was not expanded, promote or copy the value, @@ -618,48 +636,49 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, } Chain = P.getValue(1); + Parts[i] = P; // If the source register was virtual and if we know something about it, // add an assert node. - if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && - RegisterVT.isInteger() && !RegisterVT.isVector()) { - unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; - if (FuncInfo.LiveOutRegInfo.size() > SlotNo) { - const FunctionLoweringInfo::LiveOutInfo &LOI = - FuncInfo.LiveOutRegInfo[SlotNo]; - - unsigned RegSize = RegisterVT.getSizeInBits(); - unsigned NumSignBits = LOI.NumSignBits; - unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); - - // FIXME: We capture more information than the dag can represent. For - // now, just use the tightest assertzext/assertsext possible. - bool isSExt = true; - EVT FromVT(MVT::Other); - if (NumSignBits == RegSize) - isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 - else if (NumZeroBits >= RegSize-1) - isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 - else if (NumSignBits > RegSize-8) - isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 - else if (NumZeroBits >= RegSize-8) - isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 - else if (NumSignBits > RegSize-16) - isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 - else if (NumZeroBits >= RegSize-16) - isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 - else if (NumSignBits > RegSize-32) - isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 - else if (NumZeroBits >= RegSize-32) - isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 - - if (FromVT != MVT::Other) - P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, - RegisterVT, P, DAG.getValueType(FromVT)); - } - } + if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || + !RegisterVT.isInteger() || RegisterVT.isVector() || + !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i])) + continue; + + const FunctionLoweringInfo::LiveOutInfo &LOI = + FuncInfo.LiveOutRegInfo[Regs[Part+i]]; + + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI.NumSignBits; + unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + EVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + else + continue; - Parts[i] = P; + // Add an assertion node. + assert(FromVT != MVT::Other); + Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); } Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), @@ -889,11 +908,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, Val.getResNo(), Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } - } else { - SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), - Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); - } + } else + DEBUG(dbgs() << "Dropping debug info for " << DI); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } } @@ -913,7 +929,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL); + resolveDanglingDebugInfo(V, N); + return N; } // Otherwise create a new SDValue and remember it. @@ -1088,7 +1106,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Chains[i] = DAG.getStore(Chain, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), - Add, NULL, Offsets[i], false, false, 0); + // FIXME: better loc info would be nice. + Add, MachinePointerInfo(), false, false, 0); } Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), @@ -1347,7 +1366,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) return false; } - + return true; } @@ -1383,6 +1402,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // If this is a series of conditions that are or'd or and'd together, emit // this as a sequence of branches instead of setcc's with and/or operations. + // As long as jumps are not expensive, this should improve performance. // For example, instead of something like: // cmp A, B // C = seteq @@ -1397,7 +1417,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { - if (BOp->hasOneUse() && + if (!TLI.isJumpExpensive() && + BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, @@ -1502,10 +1523,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); - // Insert the false branch. - if (CB.FalseBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, - DAG.getBasicBlock(CB.FalseBB)); + // Insert the false branch. Do this even if it's a fall through branch, + // this makes it easier to do DAG optimizations which require inverting + // the branch condition. + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB)); DAG.setRoot(BrCond); } @@ -1592,12 +1614,28 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); - SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), - TLI.getPointerTy()); + // Determine the type of the test operands. + bool UsePtrType = false; + if (!TLI.isTypeLegal(VT)) + UsePtrType = true; + else { + for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) + if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) { + // Switch table case range are encoded into series of masks. + // Just use pointer type, it's guaranteed to fit. + UsePtrType = true; + break; + } + } + if (UsePtrType) { + VT = TLI.getPointerTy(); + Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT); + } - B.Reg = FuncInfo.CreateReg(TLI.getPointerTy()); + B.RegVT = VT; + B.Reg = FuncInfo.CreateReg(VT); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), - B.Reg, ShiftOp); + B.Reg, Sub); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1623,36 +1661,34 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } /// visitBitTestCase - this function produces one "bit test" -void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, +void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, + MachineBasicBlock* NextMBB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { - SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, - TLI.getPointerTy()); + EVT VT = BB.RegVT; + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + Reg, VT); SDValue Cmp; if (CountPopulation_64(B.Mask) == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(ShiftOp.getValueType()), + TLI.getSetCCResultType(VT), ShiftOp, - DAG.getConstant(CountTrailingZeros_64(B.Mask), - TLI.getPointerTy()), + DAG.getConstant(CountTrailingZeros_64(B.Mask), VT), ISD::SETEQ); } else { // Make desired shift - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), - TLI.getPointerTy(), - DAG.getConstant(1, TLI.getPointerTy()), - ShiftOp); + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, + DAG.getConstant(1, VT), ShiftOp); // Emit bit tests and jumps SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), - TLI.getPointerTy(), SwitchVal, - DAG.getConstant(B.Mask, TLI.getPointerTy())); + VT, SwitchVal, DAG.getConstant(B.Mask, VT)); Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(AndOp.getValueType()), - AndOp, DAG.getConstant(0, TLI.getPointerTy()), + TLI.getSetCCResultType(VT), + AndOp, DAG.getConstant(0, VT), ISD::SETNE); } @@ -1732,10 +1768,56 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; - // TODO: If any two of the cases has the same destination, and if one value + // If any two of the cases has the same destination, and if one value // is the same as the other, but has one bit unset that the other has set, // use bit manipulation to do two compares at once. For example: // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + // TODO: This could be extended to merge any 2 cases in switches with 3 cases. + // TODO: Handle cases where CR.CaseBB != SwitchBB. + if (Size == 2 && CR.CaseBB == SwitchBB) { + Case &Small = *CR.Range.first; + Case &Big = *(CR.Range.second-1); + + if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { + const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue(); + const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue(); + + // Check that there is only one bit different. + if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && + (SmallValue | BigValue) == BigValue) { + // Isolate the common bit. + APInt CommonBit = BigValue & ~SmallValue; + assert((SmallValue | CommonBit) == BigValue && + CommonBit.countPopulation() == 1 && "Not a common bit?"); + + SDValue CondLHS = getValue(SV); + EVT VT = CondLHS.getValueType(); + DebugLoc DL = getCurDebugLoc(); + + SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, + DAG.getConstant(CommonBit, VT)); + SDValue Cond = DAG.getSetCC(DL, MVT::i1, + Or, DAG.getConstant(BigValue, VT), + ISD::SETEQ); + + // Update successor info. + SwitchBB->addSuccessor(Small.BB); + SwitchBB->addSuccessor(Default); + + // Insert the true branch. + SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, + getControlRoot(), Cond, + DAG.getBasicBlock(Small.BB)); + + // Insert the false branch. + BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, + DAG.getBasicBlock(Default)); + + DAG.setRoot(BrCond); + return true; + } + } + } // Rearrange the case blocks so that the last one falls through if possible. if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { @@ -1800,9 +1882,8 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) { } static APInt ComputeRange(const APInt &First, const APInt &Last) { - APInt LastExt(Last), FirstExt(First); uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - LastExt.sext(BitWidth); FirstExt.sext(BitWidth); + APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); return (LastExt - FirstExt + 1ULL); } @@ -2151,7 +2232,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, } BitTestBlock BTB(lowBound, cmpRange, SV, - -1U, (CR.CaseBB == SwitchBB), + -1U, MVT::Other, (CR.CaseBB == SwitchBB), CR.CaseBB, Default, BTC); if (CR.CaseBB == SwitchBB) @@ -2180,7 +2261,8 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, if (Cases.size() >= 2) // Must recompute end() each iteration because it may be // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) { + for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); + J != Cases.end(); ) { const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); MachineBasicBlock* nextBB = J->BB; @@ -2205,6 +2287,19 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, return numCmps; } +void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, + MachineBasicBlock *Last) { + // Update JTCases. + for (unsigned i = 0, e = JTCases.size(); i != e; ++i) + if (JTCases[i].first.HeaderBB == First) + JTCases[i].first.HeaderBB = Last; + + // Update BitTestCases. + for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) + if (BitTestCases[i].Parent == First) + BitTestCases[i].Parent = Last; +} + void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { MachineBasicBlock *SwitchMBB = FuncInfo.MBB; @@ -2292,30 +2387,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { void SelectionDAGBuilder::visitFSub(const User &I) { // -0.0 - X --> fneg const Type *Ty = I.getType(); - if (Ty->isVectorTy()) { - if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) { - const VectorType *DestTy = cast<VectorType>(I.getType()); - const Type *ElTy = DestTy->getElementType(); - unsigned VL = DestTy->getNumElements(); - std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy)); - Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); - if (CV == CNZ) { - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), - Op2.getValueType(), Op2)); - return; - } - } + if (isa<Constant>(I.getOperand(0)) && + I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; } - if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) - if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), - Op2.getValueType(), Op2)); - return; - } - visitBinary(I, ISD::FSUB); } @@ -2329,31 +2408,29 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - if (!I.getType()->isVectorTy() && - Op2.getValueType() != TLI.getShiftAmountTy()) { + + MVT ShiftTy = TLI.getShiftAmountTy(); + + // Coerce the shift amount to the right type if we can. + if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { + unsigned ShiftSize = ShiftTy.getSizeInBits(); + unsigned Op2Size = Op2.getValueType().getSizeInBits(); + DebugLoc DL = getCurDebugLoc(); + // If the operand is smaller than the shift count type, promote it. - EVT PTy = TLI.getPointerTy(); - EVT STy = TLI.getShiftAmountTy(); - if (STy.bitsGT(Op2.getValueType())) - Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), - TLI.getShiftAmountTy(), Op2); + if (ShiftSize > Op2Size) + Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); + // If the operand is larger than the shift count type but the shift // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to // optimization early. - else if (STy.getSizeInBits() >= - Log2_32_Ceil(Op2.getValueType().getSizeInBits())) - Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getShiftAmountTy(), Op2); - // Otherwise we'll need to temporarily settle for some other - // convenient type; type legalization will make adjustments as - // needed. - else if (PTy.bitsLT(Op2.getValueType())) - Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getPointerTy(), Op2); - else if (PTy.bitsGT(Op2.getValueType())) - Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), Op2); + else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) + Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); + // Otherwise we'll need to temporarily settle for some other convenient + // type. Type legalization will make adjustments once the shiftee is split. + else + Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); } setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), @@ -2499,9 +2576,9 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { EVT DestVT = TLI.getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is - // either a BIT_CONVERT or a no-op. + // either a BITCAST or a no-op. if (DestVT != N.getValueType()) - setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(), DestVT, N)); // convert types. else setValue(&I, N); // noop cast. @@ -2650,7 +2727,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { } else { StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts < SrcNumElts) + StartIdx[Input] + MaskNumElts <= SrcNumElts) RangeUse[Input] = 1; // Extract from a multiple of the mask length. } } @@ -2726,8 +2803,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { bool IntoUndef = isa<UndefValue>(Op0); bool FromUndef = isa<UndefValue>(Op1); - unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, - I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); SmallVector<EVT, 4> AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); @@ -2765,8 +2841,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Type *ValTy = I.getType(); bool OutOfUndef = isa<UndefValue>(Op0); - unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, - I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); SmallVector<EVT, 4> ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); @@ -2884,7 +2959,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. - unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); if (Align <= StackAlign) Align = 0; @@ -2920,6 +2995,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata("nontemporal") != 0; unsigned Alignment = I.getAlignment(); + const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; @@ -2930,10 +3006,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Root; bool ConstantMemory = false; - if (I.isVolatile()) + if (I.isVolatile() || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA->pointsToConstantMemory(SV)) { + else if (AA->pointsToConstantMemory( + AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -2943,23 +3020,38 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } SmallVector<SDValue, 4> Values(NumValues); - SmallVector<SDValue, 4> Chains(NumValues); + SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), + NumValues)); EVT PtrVT = Ptr.getValueType(); - for (unsigned i = 0; i != NumValues; ++i) { + unsigned ChainI = 0; + for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { + // Serializing loads here may result in excessive register pressure, and + // TokenFactor places arbitrary choke points on the scheduler. SD scheduling + // could recover a bit by hoisting nodes upward in the chain by recognizing + // they are side-effect free or do not alias. The optimizer should really + // avoid this case by converting large object/array copies to llvm.memcpy + // (MaxParallelChains should always remain as failsafe). + if (ChainI == MaxParallelChains) { + assert(PendingLoads.empty() && "PendingLoads must be serialized first"); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + Root = Chain; + ChainI = 0; + } SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, - A, SV, Offsets[i], isVolatile, - isNonTemporal, Alignment); + A, MachinePointerInfo(SV, Offsets[i]), isVolatile, + isNonTemporal, Alignment, TBAAInfo); Values[i] = L; - Chains[i] = L.getValue(1); + Chains[ChainI] = L.getValue(1); } if (!ConstantMemory) { SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), - MVT::Other, &Chains[0], NumValues); + MVT::Other, &Chains[0], ChainI); if (isVolatile) DAG.setRoot(Chain); else @@ -2989,23 +3081,37 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Ptr = getValue(PtrV); SDValue Root = getRoot(); - SmallVector<SDValue, 4> Chains(NumValues); + SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), + NumValues)); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata("nontemporal") != 0; unsigned Alignment = I.getAlignment(); - - for (unsigned i = 0; i != NumValues; ++i) { + const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + + unsigned ChainI = 0; + for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { + // See visitLoad comments. + if (ChainI == MaxParallelChains) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + Root = Chain; + ChainI = 0; + } SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); - Chains[i] = DAG.getStore(Root, getCurDebugLoc(), - SDValue(Src.getNode(), Src.getResNo() + i), - Add, PtrV, Offsets[i], isVolatile, - isNonTemporal, Alignment); - } - - DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), - MVT::Other, &Chains[0], NumValues)); + SDValue St = DAG.getStore(Root, getCurDebugLoc(), + SDValue(Src.getNode(), Src.getResNo() + i), + Add, MachinePointerInfo(PtrV, Offsets[i]), + isVolatile, isNonTemporal, Alignment, TBAAInfo); + Chains[ChainI] = St; + } + + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + ++SDNodeOrder; + AssignOrderingToNode(StoreNode.getNode()); + DAG.setRoot(StoreNode); } /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC @@ -3031,7 +3137,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. - if (!IsTgtIntrinsic) + if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || + Info.opc == ISD::INTRINSIC_W_CHAIN) Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. @@ -3062,7 +3169,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // This is target intrinsic that touches memory Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), VTs, &Ops[0], Ops.size(), - Info.memVT, Info.ptrVal, Info.offset, + Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, Info.readMem, Info.writeMem); } else if (!HasChain) { @@ -3087,7 +3195,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!I.getType()->isVoidTy()) { if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) { EVT VT = TLI.getValueType(PTy); - Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); + Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result); } setValue(&I, Result); @@ -3106,7 +3214,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { DAG.getConstant(0x007fffff, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, DAG.getConstant(0x3f800000, MVT::i32)); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); } /// GetExponent - Get the exponent: @@ -3205,13 +3313,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f7f5e7e)); - SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5); // Add the exponent into the result in integer domain. SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32, TwoToFracPartOfX, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6); + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3231,13 +3339,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3f7ff8fd)); - SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7); // Add the exponent into the result in integer domain. SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32, TwoToFracPartOfX, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8); + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3269,14 +3377,14 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, getF32Constant(DAG, 0x3f800000)); - SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl, + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13); // Add the exponent into the result in integer domain. SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32, TwoToFracPartOfX, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14); + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14); } } else { // No special expansion. @@ -3298,7 +3406,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getArgOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log(2) [0.69314718f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); @@ -3408,7 +3516,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getArgOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Get the exponent. SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); @@ -3517,7 +3625,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getArgOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); @@ -3645,11 +3753,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f7f5e7e)); - SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); + SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, TwoToFractionalPartOfX); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: @@ -3670,11 +3778,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3f7ff8fd)); - SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); + SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, TwoToFractionalPartOfX); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: @@ -3706,11 +3814,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, getF32Constant(DAG, 0x3f800000)); - SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); + SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, TwoToFractionalPartOfX); } } else { @@ -3778,11 +3886,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f7f5e7e)); - SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); + SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, TwoToFractionalPartOfX); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: @@ -3803,11 +3911,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3f7ff8fd)); - SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); + SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, TwoToFractionalPartOfX); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: @@ -3839,11 +3947,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, getF32Constant(DAG, 0x3f800000)); - SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); + SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, TwoToFractionalPartOfX); } } else { @@ -3915,13 +4023,16 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, - int64_t Offset, + int64_t Offset, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; MachineFunction &MF = DAG.getMachineFunction(); + const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + // Ignore inlined function arguments here. DIVariable DV(Variable); if (DV.isInlinedFnArgument(MF.getFunction())) @@ -3935,14 +4046,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (Arg->hasByValAttr()) { // Byval arguments' frame index is recorded during argument lowering. // Use this info directly. - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); Reg = TRI->getFrameRegister(MF); Offset = FuncInfo.getByValArgumentFrameIndex(Arg); + // If byval argument ofset is not recorded then ignore this. + if (!Offset) + Reg = 0; } if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) { Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); - if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned PR = RegInfo.getLiveInPhysReg(Reg); if (PR) @@ -3951,13 +4064,25 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, } if (!Reg) { + // Check if ValueMap has reg number. DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); - if (VMI == FuncInfo.ValueMap.end()) - return false; - Reg = VMI->second; + if (VMI != FuncInfo.ValueMap.end()) + Reg = VMI->second; } - const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + if (!Reg && N.getNode()) { + // Check if frame index is available. + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) + if (FrameIndexSDNode *FINode = + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) { + Reg = TRI->getFrameRegister(MF); + Offset = FINode->getIndex(); + } + } + + if (!Reg) + return false; + MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); @@ -3966,9 +4091,11 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, } // VisualStudio defines setjmp as _setjmp -#if defined(_MSC_VER) && defined(setjmp) -#define setjmp_undefined_for_visual_studio -#undef setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc #endif /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If @@ -4013,7 +4140,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, - I.getArgOperand(0), 0, I.getArgOperand(1), 0)); + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)))); return 0; } case Intrinsic::memset: { @@ -4028,7 +4156,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getArgOperand(0), 0)); + MachinePointerInfo(I.getArgOperand(0)))); return 0; } case Intrinsic::memmove: { @@ -4044,22 +4172,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - - // If the source and destination are known to not be aliases, we can - // lower memmove as memcpy. - uint64_t Size = -1ULL; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3)) - Size = C->getZExtValue(); - if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) == - AliasAnalysis::NoAlias) { - DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - false, I.getArgOperand(0), 0, - I.getArgOperand(1), 0)); - return 0; - } - DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getArgOperand(0), 0, I.getArgOperand(1), 0)); + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)))); return 0; } case Intrinsic::dbg_declare: { @@ -4078,10 +4193,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Check if address has undef value. if (isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { - SDDbgValue*SDV = - DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); + DEBUG(dbgs() << "Dropping debug info for " << DI); return 0; } @@ -4092,7 +4204,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDDbgValue *SDV; if (N.getNode()) { // Parameters are handled specially. - bool isParameter = + bool isParameter = DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); @@ -4104,25 +4216,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Byval parameter. We have a frame index at this point. SDV = DAG.getDbgValue(Variable, FINode->getIndex(), 0, dl, SDNodeOrder); - else + else { // Can't do anything with other non-AI cases yet. This might be a // parameter of a callee function that got inlined, for example. + DEBUG(dbgs() << "Dropping debug info for " << DI); return 0; + } } else if (AI) SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), 0, dl, SDNodeOrder); - else + else { // Can't do anything with other non-AI cases yet. + DEBUG(dbgs() << "Dropping debug info for " << DI); return 0; + } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { - // If Address is an arugment then try to emits its dbg value using - // virtual register info from the FuncInfo.ValueMap. Otherwise add undef - // to help track missing debug info. + // If Address is an argument then try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { - SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); + // If variable is pinned by a alloca in dominating bb then + // use StaticAllocaMap. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { + if (AI->getParent() != DI.getParent()) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + SDV = DAG.getDbgValue(Variable, SI->second, + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + return 0; + } + } + } + DEBUG(dbgs() << "Dropping debug info for " << DI); } } return 0; @@ -4160,17 +4287,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { N.getResNo(), Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } - } else if (isa<PHINode>(V) && !V->use_empty() ) { + } else if (!V->use_empty() ) { // Do not call getValue(V) yet, as we don't want to generate code. // Remember it for later. DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); DanglingDebugInfoMap[V] = DDI; } else { // We may expand this to cover more cases. One case where we have no - // data available is an unreferenced parameter; we need this fallback. - SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), - Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); + // data available is an unreferenced parameter. + DEBUG(dbgs() << "Dropping debug info for " << DI); } } @@ -4186,7 +4311,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (SI == FuncInfo.StaticAllocaMap.end()) return 0; // VLAs. int FI = SI->second; - + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); @@ -4282,11 +4407,75 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_longjmp: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, - getRoot(), - getValue(I.getArgOperand(0)))); + getRoot(), getValue(I.getArgOperand(0)))); + return 0; + } + case Intrinsic::eh_sjlj_dispatch_setup: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, + getRoot(), getValue(I.getArgOperand(0)))); return 0; } + case Intrinsic::x86_mmx_pslli_w: + case Intrinsic::x86_mmx_pslli_d: + case Intrinsic::x86_mmx_pslli_q: + case Intrinsic::x86_mmx_psrli_w: + case Intrinsic::x86_mmx_psrli_d: + case Intrinsic::x86_mmx_psrli_q: + case Intrinsic::x86_mmx_psrai_w: + case Intrinsic::x86_mmx_psrai_d: { + SDValue ShAmt = getValue(I.getArgOperand(1)); + if (isa<ConstantSDNode>(ShAmt)) { + visitTargetIntrinsic(I, Intrinsic); + return 0; + } + unsigned NewIntrinsic = 0; + EVT ShAmtVT = MVT::v2i32; + switch (Intrinsic) { + case Intrinsic::x86_mmx_pslli_w: + NewIntrinsic = Intrinsic::x86_mmx_psll_w; + break; + case Intrinsic::x86_mmx_pslli_d: + NewIntrinsic = Intrinsic::x86_mmx_psll_d; + break; + case Intrinsic::x86_mmx_pslli_q: + NewIntrinsic = Intrinsic::x86_mmx_psll_q; + break; + case Intrinsic::x86_mmx_psrli_w: + NewIntrinsic = Intrinsic::x86_mmx_psrl_w; + break; + case Intrinsic::x86_mmx_psrli_d: + NewIntrinsic = Intrinsic::x86_mmx_psrl_d; + break; + case Intrinsic::x86_mmx_psrli_q: + NewIntrinsic = Intrinsic::x86_mmx_psrl_q; + break; + case Intrinsic::x86_mmx_psrai_w: + NewIntrinsic = Intrinsic::x86_mmx_psra_w; + break; + case Intrinsic::x86_mmx_psrai_d: + NewIntrinsic = Intrinsic::x86_mmx_psra_d; + break; + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + } + + // The vector shift intrinsics with scalars uses 32b shift amounts but + // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits + // to be zero. + // We must do this early because v2i32 is not a legal type. + DebugLoc dl = getCurDebugLoc(); + SDValue ShOps[2]; + ShOps[0] = ShAmt; + ShOps[1] = DAG.getConstant(0, MVT::i32); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); + EVT DestVT = TLI.getValueType(I.getType()); + ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt); + Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + DAG.getConstant(NewIntrinsic, MVT::i32), + getValue(I.getArgOperand(0)), ShAmt); + setValue(&I, Res); + return 0; + } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -4430,8 +4619,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Store the stack protector onto the stack. Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, - PseudoSourceValue::getFixedStack(FI), - 0, true, false, 0); + MachinePointerInfo::getFixedStack(FI), + true, false, 0); setValue(&I, Res); DAG.setRoot(Res); return 0; @@ -4510,14 +4699,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::prefetch: { SDValue Ops[4]; + unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); - DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); + DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, + DAG.getVTList(MVT::Other), + &Ops[0], 4, + EVT::getIntegerVT(*Context, 8), + MachinePointerInfo(I.getArgOperand(0)), + 0, /* align */ + false, /* volatile */ + rw==0, /* read */ + rw==1)); /* write */ return 0; } - case Intrinsic::memory_barrier: { SDValue Ops[6]; Ops[0] = getRoot(); @@ -4536,7 +4733,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), - I.getArgOperand(0)); + MachinePointerInfo(I.getArgOperand(0))); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -4599,6 +4796,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, FTy->isVarArg(), Outs, FTy->getContext()); SDValue DemoteStackSlot; + int DemoteStackIdx = -100; if (!CanLowerReturn) { uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( @@ -4606,10 +4804,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, unsigned Align = TLI.getTargetData()->getPrefTypeAlignment( FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy()); Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; Entry.isSExt = false; @@ -4703,7 +4901,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, - Add, NULL, Offsets[i], false, false, 1); + Add, + MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), + false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } @@ -4711,7 +4911,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &Chains[0], NumValues); PendingLoads.push_back(Chain); - + // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. SmallVector<EVT, 4> RetTys; @@ -4724,7 +4924,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, EVT VT = RetTys[I]; EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT); - + SDValue ReturnValue = getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs, RegisterVT, VT, AssertOp); @@ -4806,7 +5006,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SDValue Ptr = Builder.getValue(PtrVal); SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, - Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/, + Ptr, MachinePointerInfo(PtrVal), false /*volatile*/, false /*nontemporal*/, 1 /* align=1 */); @@ -4902,7 +5102,25 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { visitInlineAsm(&I); return; } - + + // See if any floating point values are being passed to this function. This is + // used to emit an undefined reference to fltused on Windows. + const FunctionType *FT = + cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0)); + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + if (FT->isVarArg() && + !MMI.callsExternalVAFunctionWithFloatingPointArguments()) { + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + const Type* T = I.getArgOperand(i)->getType(); + for (po_iterator<const Type*> i = po_begin(T), e = po_end(T); + i != e; ++i) { + if (!i->isFloatingPointTy()) continue; + MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true); + break; + } + } + } + const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { @@ -4980,7 +5198,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { } } } - + SDValue Callee; if (!RenameFn) Callee = getValue(I.getCalledValue()); @@ -5008,7 +5226,7 @@ public: /// contains the set of register corresponding to the operand. RegsForValue AssignedRegs; - explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info) + explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { } @@ -5083,6 +5301,8 @@ private: } }; +typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; + } // end llvm namespace. /// isAllocatableRegister - If the specified register is safe to allocate, @@ -5192,7 +5412,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // vector types). EVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { - OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { @@ -5202,7 +5422,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // machine. RegVT = EVT::getIntegerVT(Context, OpInfo.ConstraintVT.getSizeInBits()); - OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } @@ -5320,30 +5540,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); /// ConstraintOperands - Information about all of the constraints. - std::vector<SDISelAsmOperandInfo> ConstraintOperands; + SDISelAsmOperandInfoVector ConstraintOperands; std::set<unsigned> OutputRegs, InputRegs; - // Do a prepass over the constraints, canonicalizing them, and building up the - // ConstraintOperands list. - std::vector<InlineAsm::ConstraintInfo> - ConstraintInfos = IA->ParseConstraints(); - - bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI); - - SDValue Chain, Flag; - - // We won't need to flush pending loads if this asm doesn't touch - // memory and is nonvolatile. - if (hasMemory || IA->hasSideEffects()) - Chain = getRoot(); - else - Chain = DAG.getRoot(); + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS); + bool hasMemory = false; unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. - for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { - ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i])); + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); EVT OpVT = MVT::Other; @@ -5380,9 +5587,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this is an input or an indirect output, process the call argument. // BasicBlocks are labels, currently appearing only in asm's. if (OpInfo.CallOperandVal) { - // Strip bitcasts, if any. This mostly comes up for functions. - OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts(); - if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); } else { @@ -5393,11 +5597,33 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } OpInfo.ConstraintVT = OpVT; + + // Indirect operand accesses access memory. + if (OpInfo.isIndirect) + hasMemory = true; + else { + for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]); + if (CType == TargetLowering::C_Memory) { + hasMemory = true; + break; + } + } + } } + SDValue Chain, Flag; + + // We won't need to flush pending loads if this asm doesn't touch + // memory and is nonvolatile. + if (hasMemory || IA->hasSideEffects()) + Chain = getRoot(); + else + Chain = DAG.getRoot(); + // Second pass over the constraints: compute which constraint option to use // and assign registers to constraints that want a specific physreg. - for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; // If this is an output operand with a matching input operand, look up the @@ -5406,7 +5632,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // error. if (OpInfo.hasMatchingInput()) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; - + if (OpInfo.ConstraintVT != Input.ConstraintVT) { if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || @@ -5427,7 +5653,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && !OpInfo.isIndirect) { - assert(OpInfo.Type == InlineAsm::isInput && + assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) && "Can only indirectify direct input operands!"); // Memory operands really want the address of the value. If we don't have @@ -5451,7 +5677,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); Chain = DAG.getStore(Chain, getCurDebugLoc(), - OpInfo.CallOperand, StackSlot, NULL, 0, + OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(SSFI), false, false, 0); OpInfo.CallOperand = StackSlot; } @@ -5469,8 +5696,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { GetRegistersForValue(OpInfo, OutputRegs, InputRegs); } - ConstraintInfos.clear(); - // Second pass - Loop over all of the operands, assigning virtual or physregs // to register class operands. for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { @@ -5495,9 +5720,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); - // Remember the AlignStack bit as operand 3. - AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0, - MVT::i1)); + // Remember the HasSideEffect and AlignStack bits as operand 3. + unsigned ExtraInfo = 0; + if (IA->hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + ExtraInfo |= InlineAsm::Extra_IsAlignStack; + AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, + TLI.getPointerTy())); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -5588,7 +5818,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { " don't know how to handle tied " "indirect register inputs"); } - + RegsForValue MatchedRegs; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); @@ -5607,7 +5837,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { DAG, AsmNodeOperands); break; } - + assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && "Unexpected number of operands"); @@ -5622,8 +5852,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Treat indirect 'X' constraint as memory. - if (OpInfo.ConstraintType == TargetLowering::C_Other && - OpInfo.isIndirect) + if (OpInfo.ConstraintType == TargetLowering::C_Other && + OpInfo.isIndirect) OpInfo.ConstraintType = TargetLowering::C_Memory; if (OpInfo.ConstraintType == TargetLowering::C_Other) { @@ -5642,7 +5872,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } - + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); assert(InOperandVal.getValueType() == TLI.getPointerTy() && @@ -5693,7 +5923,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), - DAG.getVTList(MVT::Other, MVT::Flag), + DAG.getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], AsmNodeOperands.size()); Flag = Chain.getValue(1); @@ -5713,7 +5943,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // not have the same VT as was expected. Convert it to the right type // with bit_convert. if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { - Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), ResultType, Val); } else if (ResultType != Val.getValueType() && @@ -5751,7 +5981,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDValue Val = DAG.getStore(Chain, getCurDebugLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), - StoresToEmit[i].second, 0, + MachinePointerInfo(StoresToEmit[i].second), false, false, 0); OutChains.push_back(Val); } @@ -5888,7 +6118,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; + MyFlags.VT = RegisterVT.getSimpleVT(); MyFlags.Used = isReturnValueUsed; if (RetSExt) MyFlags.Flags.setSExt(); @@ -5924,7 +6154,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerCall emitted a null value!"); - assert(Ins[i].VT == InVals[i].getValueType() && + assert(EVT(Ins[i].VT) == InVals[i].getValueType() && "LowerCall emitted a value with the wrong type!"); }); @@ -6085,7 +6315,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerFormalArguments emitted a null value!"); - assert(Ins[i].VT == InVals[i].getValueType() && + assert(EVT(Ins[i].VT) == InVals[i].getValueType() && "LowerFormalArguments emitted a value with the wrong type!"); } }); @@ -6154,7 +6384,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // Note down frame index for byval arguments. if (I->hasByValAttr() && !ArgValues.empty()) - if (FrameIndexSDNode *FI = + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex()); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 5f400e9..a1a70c3 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -258,15 +258,16 @@ private: struct BitTestBlock { BitTestBlock(APInt F, APInt R, const Value* SV, - unsigned Rg, bool E, + unsigned Rg, EVT RgVT, bool E, MachineBasicBlock* P, MachineBasicBlock* D, const BitTestInfo& C): - First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E), + First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), Parent(P), Default(D), Cases(C) { } APInt First; APInt Range; const Value *SValue; unsigned Reg; + EVT RegVT; bool Emitted; MachineBasicBlock *Parent; MachineBasicBlock *Default; @@ -347,7 +348,7 @@ public: SDValue getControlRoot(); DebugLoc getCurDebugLoc() const { return CurDebugLoc; } - + void setCurDebugLoc(DebugLoc dl){ CurDebugLoc = dl; } unsigned getSDNodeOrder() const { return SDNodeOrder; } void CopyValueToVirtualRegister(const Value *V, unsigned Reg); @@ -398,6 +399,10 @@ public: void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = NULL); + /// UpdateSplitBlock - When an MBB was split during scheduling, update the + /// references that ned to refer to the last resulting block. + void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); + private: // Terminator instructions. void visitRet(const ReturnInst &I); @@ -431,7 +436,8 @@ public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); - void visitBitTestCase(MachineBasicBlock* NextMBB, + void visitBitTestCase(BitTestBlock &BB, + MachineBasicBlock* NextMBB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 66cb5ce..62ebc81 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -43,6 +43,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -53,8 +54,17 @@ using namespace llvm; STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); +STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); +STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); +#ifndef NDEBUG +STATISTIC(NumBBWithOutOfOrderLineInfo, + "Number of blocks with out of order line number info"); +STATISTIC(NumMBBWithOutOfOrderLineInfo, + "Number of machine blocks with out of order line number info"); +#endif + static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " @@ -170,15 +180,18 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : +SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, + CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), CurDAG(new SelectionDAG(tm)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), - DAGSize(0) -{} + DAGSize(0) { + initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); + initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); + } SelectionDAGISel::~SelectionDAGISel() { delete SDB; @@ -202,6 +215,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { static bool FunctionCallsSetJmp(const Function *F) { const Module *M = F->getParent(); static const char *ReturnsTwiceFns[] = { + "_setjmp", "setjmp", "sigsetjmp", "setjmp_syscall", @@ -227,6 +241,44 @@ static bool FunctionCallsSetJmp(const Function *F) { #undef NUM_RETURNS_TWICE_FNS } +/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that +/// may trap on it. In this case we have to split the edge so that the path +/// through the predecessor block that doesn't go to the phi block doesn't +/// execute the possibly trapping instruction. +/// +/// This is required for correctness, so it must be done at -O0. +/// +static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { + // Loop for blocks with phi nodes. + for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + PHINode *PN = dyn_cast<PHINode>(BB->begin()); + if (PN == 0) continue; + + ReprocessBlock: + // For each block with a PHI node, check to see if any of the input values + // are potentially trapping constant expressions. Constant expressions are + // the only potentially trapping value that can occur as the argument to a + // PHI. + for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i)); + if (CE == 0 || !CE->canTrap()) continue; + + // The only case we have to worry about is when the edge is critical. + // Since this block has a PHI Node, we assume it has multiple input + // edges: check to see if the pred has multiple successors. + BasicBlock *Pred = PN->getIncomingBlock(i); + if (Pred->getTerminator()->getNumSuccessors() == 1) + continue; + + // Okay, we have to split this edge. + SplitCriticalEdge(Pred->getTerminator(), + GetSuccessorNumber(Pred, BB), SDISel, true); + goto ReprocessBlock; + } + } +} + bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Do some sanity-checking on the command-line options. assert((!EnableFastISelVerbose || EnableFastISel) && @@ -245,6 +297,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); + SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); + CurDAG->init(*MF); FuncInfo->set(Fn, *MF); SDB->init(GFI, *AA); @@ -261,7 +315,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (!FuncInfo->ArgDbgValues.empty()) for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), E = RegInfo->livein_end(); LI != E; ++LI) - if (LI->second) + if (LI->second) LiveInMap.insert(std::make_pair(LI->first, LI->second)); // Insert DBG_VALUE instructions for function arguments to the entry block. @@ -282,14 +336,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (LDI != LiveInMap.end()) { MachineInstr *Def = RegInfo->getVRegDef(LDI->second); MachineBasicBlock::iterator InsertPos = Def; - const MDNode *Variable = + const MDNode *Variable = MI->getOperand(MI->getNumOperands()-1).getMetadata(); unsigned Offset = MI->getOperand(1).getImm(); // Def is never a terminator here, so it is ok to increment InsertPos. - BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), + BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) .addReg(LDI->second, RegState::Debug) .addImm(Offset).addMetadata(Variable); + + // If this vreg is directly copied into an exported register then + // that COPY instructions also need DBG_VALUE, if it is the only + // user of LDI->second. + MachineInstr *CopyUseMI = NULL; + for (MachineRegisterInfo::use_iterator + UI = RegInfo->use_begin(LDI->second); + MachineInstr *UseMI = UI.skipInstruction();) { + if (UseMI->isDebugValue()) continue; + if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) { + CopyUseMI = UseMI; continue; + } + // Otherwise this is another use or second copy use. + CopyUseMI = NULL; break; + } + if (CopyUseMI) { + MachineInstr *NewMI = + BuildMI(*MF, CopyUseMI->getDebugLoc(), + TII.get(TargetOpcode::DBG_VALUE)) + .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug) + .addImm(Offset).addMetadata(Variable); + EntryMBB->insertAfter(CopyUseMI, NewMI); + } } } @@ -303,10 +380,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); - // Operand 1 of an inline asm instruction indicates whether the asm - // needs stack or not. - if ((II->isInlineAsm() && II->getOperand(1).getImm()) || - (TID.isCall() && !TID.isReturn())) { + if ((TID.isCall() && !TID.isReturn()) || + II->isStackAligningInlineAsm()) { MFI->setHasCalls(true); goto done; } @@ -362,6 +437,7 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, // Final step, emit the lowered DAG as machine code. CodeGenAndEmitDAG(); + return; } void SelectionDAGISel::ComputeLiveOutVRegInfo() { @@ -406,9 +482,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { // Only install this information if it tells us something. if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) { - DestReg -= TargetRegisterInfo::FirstVirtualRegister; - if (DestReg >= FuncInfo->LiveOutRegInfo.size()) - FuncInfo->LiveOutRegInfo.resize(DestReg+1); + FuncInfo->LiveOutRegInfo.grow(DestReg); FunctionLoweringInfo::LiveOutInfo &LOI = FuncInfo->LiveOutRegInfo[DestReg]; LOI.NumSignBits = NumSignBits; @@ -541,13 +615,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. + MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB; { NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); - FuncInfo->MBB = Scheduler->EmitSchedule(); + LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(); FuncInfo->InsertPt = Scheduler->InsertPos; } + // If the block was split, make sure we update any references that are used to + // update PHI nodes later on. + if (FirstMBB != LastMBB) + SDB->UpdateSplitBlock(FirstMBB, LastMBB); + // Free the scheduler state. { NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName, @@ -563,19 +643,19 @@ void SelectionDAGISel::DoInstructionSelection() { DEBUG(errs() << "===== Instruction selection begins:\n"); PreprocessISelDAG(); - + // Select target instructions for the DAG. { // Number all nodes with a topological order and set DAGSize. DAGSize = CurDAG->AssignTopologicalOrder(); - + // Create a dummy node (which is not added to allnodes), that adds // a reference to the root node, preventing it from being deleted, // and tracking any changes of the root. HandleSDNode Dummy(CurDAG->getRoot()); ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode()); ++ISelPosition; - + // The AllNodes list is now topological-sorted. Visit the // nodes by starting at the end of the list (the root of the // graph) and preceding back toward the beginning (the entry @@ -587,19 +667,19 @@ void SelectionDAGISel::DoInstructionSelection() { // makes it theoretically possible to disable the DAGCombiner. if (Node->use_empty()) continue; - + SDNode *ResNode = Select(Node); - + // FIXME: This is pretty gross. 'Select' should be changed to not return // anything at all and this code should be nuked with a tactical strike. - + // If node should not be replaced, continue with the next one. if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) continue; // Replace node. if (ResNode) ReplaceUses(Node, ResNode); - + // If after the replacement this node is not used any more, // remove this dead node. if (Node->use_empty()) { // Don't delete EntryToken, etc. @@ -607,9 +687,9 @@ void SelectionDAGISel::DoInstructionSelection() { CurDAG->RemoveDeadNode(Node, &ISU); } } - + CurDAG->setRoot(Dummy.getValue()); - } + } DEBUG(errs() << "===== Instruction selection ends:\n"); @@ -661,6 +741,90 @@ void SelectionDAGISel::PrepareEHLandingPad() { } } + + + +bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, + FastISel *FastIS) { + // Don't try to fold volatile loads. Target has to deal with alignment + // constraints. + if (LI->isVolatile()) return false; + + // Figure out which vreg this is going into. + unsigned LoadReg = FastIS->getRegForValue(LI); + assert(LoadReg && "Load isn't already assigned a vreg? "); + + // Check to see what the uses of this vreg are. If it has no uses, or more + // than one use (at the machine instr level) then we can't fold it. + MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg); + if (RI == RegInfo->reg_end()) + return false; + + // See if there is exactly one use of the vreg. If there are multiple uses, + // then the instruction got lowered to multiple machine instructions or the + // use of the loaded value ended up being multiple operands of the result, in + // either case, we can't fold this. + MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI; + if (PostRI != RegInfo->reg_end()) + return false; + + assert(RI.getOperand().isUse() && + "The only use of the vreg must be a use, we haven't emitted the def!"); + + MachineInstr *User = &*RI; + + // Set the insertion point properly. Folding the load can cause generation of + // other random instructions (like sign extends) for addressing modes, make + // sure they get inserted in a logical place before the new instruction. + FuncInfo->InsertPt = User; + FuncInfo->MBB = User->getParent(); + + // Ask the target to try folding the load. + return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI); +} + +#ifndef NDEBUG +/// CheckLineNumbers - Check if basic block instructions follow source order +/// or not. +static void CheckLineNumbers(const BasicBlock *BB) { + unsigned Line = 0; + unsigned Col = 0; + for (BasicBlock::const_iterator BI = BB->begin(), + BE = BB->end(); BI != BE; ++BI) { + const DebugLoc DL = BI->getDebugLoc(); + if (DL.isUnknown()) continue; + unsigned L = DL.getLine(); + unsigned C = DL.getCol(); + if (L < Line || (L == Line && C < Col)) { + ++NumBBWithOutOfOrderLineInfo; + return; + } + Line = L; + Col = C; + } +} + +/// CheckLineNumbers - Check if machine basic block instructions follow source +/// order or not. +static void CheckLineNumbers(const MachineBasicBlock *MBB) { + unsigned Line = 0; + unsigned Col = 0; + for (MachineBasicBlock::const_iterator MBI = MBB->begin(), + MBE = MBB->end(); MBI != MBE; ++MBI) { + const DebugLoc DL = MBI->getDebugLoc(); + if (DL.isUnknown()) continue; + unsigned L = DL.getLine(); + unsigned C = DL.getCol(); + if (L < Line || (L == Line && C < Col)) { + ++NumMBBWithOutOfOrderLineInfo; + return; + } + Line = L; + Col = C; + } +} +#endif + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; @@ -670,6 +834,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Iterate over all basic blocks in the function. for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { const BasicBlock *LLVMBB = &*I; +#ifndef NDEBUG + CheckLineNumbers(LLVMBB); +#endif FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); @@ -682,10 +849,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Setup an EH landing-pad block. if (FuncInfo->MBB->isLandingPad()) PrepareEHLandingPad(); - + // Lower any arguments needed in this block if this is the entry block. - if (LLVMBB == &Fn.getEntryBlock()) + if (LLVMBB == &Fn.getEntryBlock()) { + for (BasicBlock::const_iterator DBI = LLVMBB->begin(), DBE = LLVMBB->end(); + DBI != DBE; ++DBI) { + if (const DbgInfoIntrinsic *DI = dyn_cast<DbgInfoIntrinsic>(DBI)) { + const DebugLoc DL = DI->getDebugLoc(); + SDB->setCurDebugLoc(DL); + break; + } + } LowerArguments(LLVMBB); + } // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { @@ -723,8 +899,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); // Try to select the instruction with FastISel. - if (FastIS->SelectInstruction(Inst)) + if (FastIS->SelectInstruction(Inst)) { + // If fast isel succeeded, check to see if there is a single-use + // non-volatile load right before the selected instruction, and see if + // the load is used by the instruction. If so, try to fold it. + const Instruction *BeforeInst = 0; + if (Inst != Begin) + BeforeInst = llvm::prior(llvm::prior(BI)); + if (BeforeInst && isa<LoadInst>(BeforeInst) && + BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst && + TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS)) + --BI; // If we succeeded, don't re-select the load. continue; + } // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(Inst)) { @@ -771,6 +958,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); } + if (Begin != BI) + ++NumDAGBlocks; + else + ++NumFastIselBlocks; + // Run SelectionDAG instruction selection on the remainder of the block // not handled by FastISel. If FastISel is not run, this is the entire // block. @@ -782,6 +974,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } delete FastIS; +#ifndef NDEBUG + for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end(); + MBI != MBE; ++MBI) + CheckLineNumbers(MBI); +#endif } void @@ -831,12 +1028,14 @@ SelectionDAGISel::FinishBasicBlock() { FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code if (j+1 != ej) - SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB, + SDB->visitBitTestCase(SDB->BitTestCases[i], + SDB->BitTestCases[i].Cases[j+1].ThisBB, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); else - SDB->visitBitTestCase(SDB->BitTestCases[i].Default, + SDB->visitBitTestCase(SDB->BitTestCases[i], + SDB->BitTestCases[i].Default, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); @@ -951,7 +1150,7 @@ SelectionDAGISel::FinishBasicBlock() { // additional DAGs necessary. for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into - MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; + FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Determine the unique successors. @@ -960,13 +1159,15 @@ SelectionDAGISel::FinishBasicBlock() { if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB) Succs.push_back(SDB->SwitchCases[i].FalseBB); - // Emit the code. Note that this could result in ThisBB being split, so - // we need to check for updates. + // Emit the code. Note that this could result in FuncInfo->MBB being split. SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); - ThisBB = FuncInfo->MBB; + + // Remember the last block, now that any splitting is done, for use in + // populating PHI nodes in successors. + MachineBasicBlock *ThisBB = FuncInfo->MBB; // Handle any PHI nodes in successors of this chunk, as if we were coming // from the original BB before switch expansion. Note that PHI nodes can @@ -1016,10 +1217,6 @@ ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { return Ctor(this, OptLevel); } -ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { - return new ScheduleHazardRecognizer(); -} - //===----------------------------------------------------------------------===// // Helper functions used by the generated instruction selector. //===----------------------------------------------------------------------===// @@ -1099,11 +1296,11 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc - Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3 + Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack) unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); - if (InOps[e-1].getValueType() == MVT::Flag) - --e; // Don't process a flag operand if it is here. + if (InOps[e-1].getValueType() == MVT::Glue) + --e; // Don't process a glue operand if it is here. while (i != e) { unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); @@ -1130,15 +1327,15 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { } } - // Add the flag input back if present. + // Add the glue input back if present. if (e != InOps.size()) Ops.push_back(InOps.back()); } -/// findFlagUse - Return use of EVT::Flag value produced by the specified +/// findGlueUse - Return use of MVT::Glue value produced by the specified /// SDNode. /// -static SDNode *findFlagUse(SDNode *N) { +static SDNode *findGlueUse(SDNode *N) { unsigned FlagResNo = N->getNumValues()-1; for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { SDUse &Use = I.getUse(); @@ -1160,11 +1357,11 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, // never find it. // // The Use may be -1 (unassigned) if it is a newly allocated node. This can - // happen because we scan down to newly selected nodes in the case of flag + // happen because we scan down to newly selected nodes in the case of glue // uses. if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)) return false; - + // Don't revisit nodes if we already scanned it and didn't fail, we know we // won't fail if we scan it again. if (!Visited.insert(Use)) @@ -1174,7 +1371,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, // Ignore chain uses, they are validated by HandleMergeInputChains. if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains) continue; - + SDNode *N = Use->getOperand(i).getNode(); if (N == Def) { if (Use == ImmedUse || Use == Root) @@ -1221,8 +1418,8 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // // * indicates nodes to be folded together. // - // If Root produces a flag, then it gets (even more) interesting. Since it - // will be "glued" together with its flag use in the scheduler, we need to + // If Root produces glue, then it gets (even more) interesting. Since it + // will be "glued" together with its glue use in the scheduler, we need to // check if it might reach N. // // [N*] // @@ -1240,30 +1437,30 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // ^ / // // f / // // | / // - // [FU] // + // [GU] // // - // If FU (flag use) indirectly reaches N (the load), and Root folds N - // (call it Fold), then X is a predecessor of FU and a successor of - // Fold. But since Fold and FU are flagged together, this will create + // If GU (glue use) indirectly reaches N (the load), and Root folds N + // (call it Fold), then X is a predecessor of GU and a successor of + // Fold. But since Fold and GU are glued together, this will create // a cycle in the scheduling graph. - // If the node has flags, walk down the graph to the "lowest" node in the - // flagged set. + // If the node has glue, walk down the graph to the "lowest" node in the + // glueged set. EVT VT = Root->getValueType(Root->getNumValues()-1); - while (VT == MVT::Flag) { - SDNode *FU = findFlagUse(Root); - if (FU == NULL) + while (VT == MVT::Glue) { + SDNode *GU = findGlueUse(Root); + if (GU == NULL) break; - Root = FU; + Root = GU; VT = Root->getValueType(Root->getNumValues()-1); - - // If our query node has a flag result with a use, we've walked up it. If + + // If our query node has a glue result with a use, we've walked up it. If // the user (which has already been selected) has a chain or indirectly uses // the chain, our WalkChainUsers predicate will not consider it. Because of // this, we cannot ignore chains in this predicate. IgnoreChains = false; } - + SmallPtrSet<SDNode*, 16> Visited; return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); @@ -1272,10 +1469,10 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { std::vector<SDValue> Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops); - + std::vector<EVT> VTs; VTs.push_back(MVT::Other); - VTs.push_back(MVT::Flag); + VTs.push_back(MVT::Glue); SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), VTs, &Ops[0], Ops.size()); New->setNodeId(-1); @@ -1287,11 +1484,11 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { } /// GetVBR - decode a vbr encoding whose top bit is set. -ALWAYS_INLINE static uint64_t +LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { assert(Val >= 128 && "Not a VBR"); Val &= 127; // Remove first vbr bit. - + unsigned Shift = 7; uint64_t NextBits; do { @@ -1299,25 +1496,25 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { Val |= (NextBits&127) << Shift; Shift += 7; } while (NextBits & 128); - + return Val; } -/// UpdateChainsAndFlags - When a match is complete, this method updates uses of -/// interior flag and chain results to use the new flag and chain results. +/// UpdateChainsAndGlue - When a match is complete, this method updates uses of +/// interior glue and chain results to use the new glue and chain results. void SelectionDAGISel:: -UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain, - const SmallVectorImpl<SDNode*> &ChainNodesMatched, - SDValue InputFlag, - const SmallVectorImpl<SDNode*> &FlagResultNodesMatched, - bool isMorphNodeTo) { +UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, + const SmallVectorImpl<SDNode*> &ChainNodesMatched, + SDValue InputGlue, + const SmallVectorImpl<SDNode*> &GlueResultNodesMatched, + bool isMorphNodeTo) { SmallVector<SDNode*, 4> NowDeadNodes; - + ISelUpdater ISU(ISelPosition); // Now that all the normal results are replaced, we replace the chain and - // flag results if present. + // glue results if present. if (!ChainNodesMatched.empty()) { assert(InputChain.getNode() != 0 && "Matched input chains but didn't produce a chain"); @@ -1325,55 +1522,55 @@ UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain, // Replace all the chain results with the final chain we ended up with. for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { SDNode *ChainNode = ChainNodesMatched[i]; - + // If this node was already deleted, don't look at it. if (ChainNode->getOpcode() == ISD::DELETED_NODE) continue; - + // Don't replace the results of the root node if we're doing a // MorphNodeTo. if (ChainNode == NodeToMatch && isMorphNodeTo) continue; - + SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1); - if (ChainVal.getValueType() == MVT::Flag) + if (ChainVal.getValueType() == MVT::Glue) ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU); - + // If the node became dead and we haven't already seen it, delete it. if (ChainNode->use_empty() && !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode)) NowDeadNodes.push_back(ChainNode); } } - - // If the result produces a flag, update any flag results in the matched - // pattern with the flag result. - if (InputFlag.getNode() != 0) { + + // If the result produces glue, update any glue results in the matched + // pattern with the glue result. + if (InputGlue.getNode() != 0) { // Handle any interior nodes explicitly marked. - for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) { - SDNode *FRN = FlagResultNodesMatched[i]; - + for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) { + SDNode *FRN = GlueResultNodesMatched[i]; + // If this node was already deleted, don't look at it. if (FRN->getOpcode() == ISD::DELETED_NODE) continue; - - assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag && - "Doesn't have a flag result"); + + assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue && + "Doesn't have a glue result"); CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1), - InputFlag, &ISU); - + InputGlue, &ISU); + // If the node became dead and we haven't already seen it, delete it. if (FRN->use_empty() && !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN)) NowDeadNodes.push_back(FRN); } } - + if (!NowDeadNodes.empty()) CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU); - + DEBUG(errs() << "ISEL: Match complete!\n"); } @@ -1392,17 +1589,17 @@ enum ChainResult { /// /// The walk we do here is guaranteed to be small because we quickly get down to /// already selected nodes "below" us. -static ChainResult +static ChainResult WalkChainUsers(SDNode *ChainedNode, SmallVectorImpl<SDNode*> &ChainedNodesInPattern, SmallVectorImpl<SDNode*> &InteriorChainedNodes) { ChainResult Result = CR_Simple; - + for (SDNode::use_iterator UI = ChainedNode->use_begin(), E = ChainedNode->use_end(); UI != E; ++UI) { // Make sure the use is of the chain, not some other value we produce. if (UI.getUse().getValueType() != MVT::Other) continue; - + SDNode *User = *UI; // If we see an already-selected machine node, then we've gone beyond the @@ -1411,7 +1608,7 @@ WalkChainUsers(SDNode *ChainedNode, if (User->isMachineOpcode() || User->getOpcode() == ISD::HANDLENODE) // Root of the graph. continue; - + if (User->getOpcode() == ISD::CopyToReg || User->getOpcode() == ISD::CopyFromReg || User->getOpcode() == ISD::INLINEASM || @@ -1437,7 +1634,7 @@ WalkChainUsers(SDNode *ChainedNode, if (!std::count(ChainedNodesInPattern.begin(), ChainedNodesInPattern.end(), User)) return CR_InducesCycle; - + // Otherwise we found a node that is part of our pattern. For example in: // x = load ptr // y = x+4 @@ -1449,7 +1646,7 @@ WalkChainUsers(SDNode *ChainedNode, InteriorChainedNodes.push_back(User); continue; } - + // If we found a TokenFactor, there are two cases to consider: first if the // TokenFactor is just hanging "below" the pattern we're matching (i.e. no // uses of the TF are in our pattern) we just want to ignore it. Second, @@ -1486,7 +1683,7 @@ WalkChainUsers(SDNode *ChainedNode, case CR_LeadsToInteriorNode: break; // Otherwise, keep processing. } - + // Okay, we know we're in the interesting interior case. The TokenFactor // is now going to be considered part of the pattern so that we rewrite its // uses (it may have uses that are not part of the pattern) with the @@ -1497,7 +1694,7 @@ WalkChainUsers(SDNode *ChainedNode, InteriorChainedNodes.push_back(User); continue; } - + return Result; } @@ -1519,7 +1716,7 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, InteriorChainedNodes) == CR_InducesCycle) return SDValue(); // Would induce a cycle. } - + // Okay, we have walked all the matched nodes and collected TokenFactor nodes // that we are interested in. Form our input TokenFactor node. SmallVector<SDValue, 3> InputChains; @@ -1530,14 +1727,14 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, if (N->getOpcode() != ISD::TokenFactor) { if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) continue; - + // Otherwise, add the input chain. SDValue InChain = ChainNodesMatched[i]->getOperand(0); assert(InChain.getValueType() == MVT::Other && "Not a chain"); InputChains.push_back(InChain); continue; } - + // If we have a token factor, we want to add all inputs of the token factor // that are not part of the pattern we're matching. for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) { @@ -1546,13 +1743,13 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, InputChains.push_back(N->getOperand(op)); } } - + SDValue Res; if (InputChains.size() == 1) return InputChains[0]; return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(), MVT::Other, &InputChains[0], InputChains.size()); -} +} /// MorphNode - Handle morphing a node in place for the selector. SDNode *SelectionDAGISel:: @@ -1560,15 +1757,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) { // It is possible we're using MorphNodeTo to replace a node with no // normal results with one that has a normal result (or we could be - // adding a chain) and the input could have flags and chains as well. + // adding a chain) and the input could have glue and chains as well. // In this case we need to shift the operands down. // FIXME: This is a horrible hack and broken in obscure cases, no worse // than the old isel though. - int OldFlagResultNo = -1, OldChainResultNo = -1; + int OldGlueResultNo = -1, OldChainResultNo = -1; unsigned NTMNumResults = Node->getNumValues(); - if (Node->getValueType(NTMNumResults-1) == MVT::Flag) { - OldFlagResultNo = NTMNumResults-1; + if (Node->getValueType(NTMNumResults-1) == MVT::Glue) { + OldGlueResultNo = NTMNumResults-1; if (NTMNumResults != 1 && Node->getValueType(NTMNumResults-2) == MVT::Other) OldChainResultNo = NTMNumResults-2; @@ -1589,54 +1786,55 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, } unsigned ResNumResults = Res->getNumValues(); - // Move the flag if needed. - if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 && - (unsigned)OldFlagResultNo != ResNumResults-1) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo), + // Move the glue if needed. + if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 && + (unsigned)OldGlueResultNo != ResNumResults-1) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo), SDValue(Res, ResNumResults-1)); - if ((EmitNodeInfo & OPFL_FlagOutput) != 0) + if ((EmitNodeInfo & OPFL_GlueOutput) != 0) --ResNumResults; // Move the chain reference if needed. if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && (unsigned)OldChainResultNo != ResNumResults-1) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), SDValue(Res, ResNumResults-1)); // Otherwise, no replacement happened because the node already exists. Replace // Uses of the old node with the new one. if (Res != Node) CurDAG->ReplaceAllUsesWith(Node, Res); - + return Res; } /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) { + SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) { // Accept if it is exactly the same as a previously recorded node. unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); - return N == RecordedNodes[RecNo]; + return N == RecordedNodes[RecNo].first; } - + /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, SelectionDAGISel &SDISel) { return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); } /// CheckNodePredicate - Implements OP_CheckNodePredicate. -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, SelectionDAGISel &SDISel, SDNode *N) { return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDNode *N) { uint16_t Opc = MatcherTable[MatcherIndex++]; @@ -1644,17 +1842,17 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, return N->getOpcode() == Opc; } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering &TLI) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (N.getValueType() == VT) return true; - + // Handle the case when VT is iPTR. return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy(); } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering &TLI, unsigned ChildNo) { @@ -1664,57 +1862,57 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { return cast<CondCodeSDNode>(N)->get() == (ISD::CondCode)MatcherTable[MatcherIndex++]; } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering &TLI) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (cast<VTSDNode>(N)->getVT() == VT) return true; - + // Handle the case when VT is iPTR. return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy(); } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); - + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); return C != 0 && C->getSExtValue() == Val; } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); - + if (N->getOpcode() != ISD::AND) return false; - + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val); } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); - + if (N->getOpcode() != ISD::OR) return false; - + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val); } @@ -1724,11 +1922,11 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, /// fail, set Result=true and return anything. If the current predicate is /// known to pass, set Result=false and return the MatcherIndex to continue /// with. If the current predicate is unknown, set Result=false and return the -/// MatcherIndex to continue with. +/// MatcherIndex to continue with. static unsigned IsPredicateKnownToFail(const unsigned char *Table, unsigned Index, SDValue N, bool &Result, SelectionDAGISel &SDISel, - SmallVectorImpl<SDValue> &RecordedNodes){ + SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) { switch (Table[Index++]) { default: Result = false; @@ -1782,21 +1980,21 @@ namespace { struct MatchScope { /// FailIndex - If this match fails, this is the index to continue with. unsigned FailIndex; - + /// NodeStack - The node stack when the scope was formed. SmallVector<SDValue, 4> NodeStack; - + /// NumRecordedNodes - The number of recorded nodes when the scope was formed. unsigned NumRecordedNodes; - + /// NumMatchedMemRefs - The number of matched memref entries. unsigned NumMatchedMemRefs; - - /// InputChain/InputFlag - The current chain/flag - SDValue InputChain, InputFlag; + + /// InputChain/InputGlue - The current chain/glue + SDValue InputChain, InputGlue; /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty. - bool HasChainNodesMatched, HasFlagResultNodesMatched; + bool HasChainNodesMatched, HasGlueResultNodesMatched; }; } @@ -1838,7 +2036,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch); case ISD::UNDEF: return Select_UNDEF(NodeToMatch); } - + assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); // Set up the node stack with NodeToMatch as the only node on the stack. @@ -1849,37 +2047,38 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // MatchScopes - Scopes used when matching, if a match failure happens, this // indicates where to continue checking. SmallVector<MatchScope, 8> MatchScopes; - + // RecordedNodes - This is the set of nodes that have been recorded by the - // state machine. - SmallVector<SDValue, 8> RecordedNodes; - + // state machine. The second value is the parent of the node, or null if the + // root is recorded. + SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes; + // MatchedMemRefs - This is the set of MemRef's we've seen in the input // pattern. SmallVector<MachineMemOperand*, 2> MatchedMemRefs; - - // These are the current input chain and flag for use when generating nodes. + + // These are the current input chain and glue for use when generating nodes. // Various Emit operations change these. For example, emitting a copytoreg // uses and updates these. - SDValue InputChain, InputFlag; - + SDValue InputChain, InputGlue; + // ChainNodesMatched - If a pattern matches nodes that have input/output // chains, the OPC_EmitMergeInputChains operation is emitted which indicates // which ones they are. The result is captured into this list so that we can // update the chain results when the pattern is complete. SmallVector<SDNode*, 3> ChainNodesMatched; - SmallVector<SDNode*, 3> FlagResultNodesMatched; - + SmallVector<SDNode*, 3> GlueResultNodesMatched; + DEBUG(errs() << "ISEL: Starting pattern match on root node: "; NodeToMatch->dump(CurDAG); errs() << '\n'); - + // Determine where to start the interpreter. Normally we start at opcode #0, // but if the state machine starts with an OPC_SwitchOpcode, then we // accelerate the first lookup (which is guaranteed to be hot) with the // OpcodeOffset table. unsigned MatcherIndex = 0; - + if (!OpcodeOffset.empty()) { // Already computed the OpcodeOffset table, just index into it. if (N.getOpcode() < OpcodeOffset.size()) @@ -1911,7 +2110,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (N.getOpcode() < OpcodeOffset.size()) MatcherIndex = OpcodeOffset[N.getOpcode()]; } - + while (1) { assert(MatcherIndex < TableSize && "Invalid index"); #ifndef NDEBUG @@ -1926,7 +2125,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // determine immediately that the first check (or first several) will // immediately fail, don't even bother pushing a scope for them. unsigned FailIndex; - + while (1) { unsigned NumToSkip = MatcherTable[MatcherIndex++]; if (NumToSkip & 128) @@ -1936,12 +2135,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, FailIndex = 0; break; } - + FailIndex = MatcherIndex+NumToSkip; - + unsigned MatcherIndexOfPredicate = MatcherIndex; (void)MatcherIndexOfPredicate; // silence warning. - + // If we can't evaluate this predicate without pushing a scope (e.g. if // it is a 'MoveParent') or if the predicate succeeds on this node, we // push the scope and evaluate the full predicate chain. @@ -1950,20 +2149,20 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, Result, *this, RecordedNodes); if (!Result) break; - + DEBUG(errs() << " Skipped scope entry (due to false predicate) at " << "index " << MatcherIndexOfPredicate << ", continuing at " << FailIndex << "\n"); ++NumDAGIselRetries; - + // Otherwise, we know that this case of the Scope is guaranteed to fail, // move to the next case. MatcherIndex = FailIndex; } - + // If the whole scope failed to match, bail. if (FailIndex == 0) break; - + // Push a MatchScope which indicates where to go if the first child fails // to match. MatchScope NewEntry; @@ -1972,17 +2171,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, NewEntry.NumRecordedNodes = RecordedNodes.size(); NewEntry.NumMatchedMemRefs = MatchedMemRefs.size(); NewEntry.InputChain = InputChain; - NewEntry.InputFlag = InputFlag; + NewEntry.InputGlue = InputGlue; NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty(); - NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty(); + NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty(); MatchScopes.push_back(NewEntry); continue; } - case OPC_RecordNode: + case OPC_RecordNode: { // Remember this node, it may end up being an operand in the pattern. - RecordedNodes.push_back(N); + SDNode *Parent = 0; + if (NodeStack.size() > 1) + Parent = NodeStack[NodeStack.size()-2].getNode(); + RecordedNodes.push_back(std::make_pair(N, Parent)); continue; - + } + case OPC_RecordChild0: case OPC_RecordChild1: case OPC_RecordChild2: case OPC_RecordChild3: case OPC_RecordChild4: case OPC_RecordChild5: @@ -1991,20 +2194,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (ChildNo >= N.getNumOperands()) break; // Match fails if out of range child #. - RecordedNodes.push_back(N->getOperand(ChildNo)); + RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo), + N.getNode())); continue; } case OPC_RecordMemRef: MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand()); continue; - - case OPC_CaptureFlagInput: - // If the current node has an input flag, capture it in InputFlag. + + case OPC_CaptureGlueInput: + // If the current node has an input glue, capture it in InputGlue. if (N->getNumOperands() != 0 && - N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) - InputFlag = N->getOperand(N->getNumOperands()-1); + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) + InputGlue = N->getOperand(N->getNumOperands()-1); continue; - + case OPC_MoveChild: { unsigned ChildNo = MatcherTable[MatcherIndex++]; if (ChildNo >= N.getNumOperands()) @@ -2013,14 +2217,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, NodeStack.push_back(N); continue; } - + case OPC_MoveParent: // Pop the current node off the NodeStack. NodeStack.pop_back(); assert(!NodeStack.empty() && "Node stack imbalance!"); - N = NodeStack.back(); + N = NodeStack.back(); continue; - + case OPC_CheckSame: if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; continue; @@ -2036,7 +2240,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned CPNum = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); - if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum, + if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, + RecordedNodes[RecNo].first, CPNum, RecordedNodes)) break; continue; @@ -2044,11 +2249,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckOpcode: if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break; continue; - + case OPC_CheckType: if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break; continue; - + case OPC_SwitchOpcode: { unsigned CurNodeOpcode = N.getOpcode(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; @@ -2066,22 +2271,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // If the opcode matches, then we will execute this case. if (CurNodeOpcode == Opc) break; - + // Otherwise, skip over this case. MatcherIndex += CaseSize; } - + // If no cases matched, bail out. if (CaseSize == 0) break; - + // Otherwise, execute the case we found. DEBUG(errs() << " OpcodeSwitch from " << SwitchStart << " to " << MatcherIndex << "\n"); continue; } - + case OPC_SwitchType: { - MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy; + MVT CurNodeVT = N.getValueType().getSimpleVT(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; unsigned CaseSize; while (1) { @@ -2090,23 +2295,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (CaseSize & 128) CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); if (CaseSize == 0) break; - - MVT::SimpleValueType CaseVT = - (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + + MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (CaseVT == MVT::iPTR) - CaseVT = TLI.getPointerTy().SimpleTy; - + CaseVT = TLI.getPointerTy(); + // If the VT matches, then we will execute this case. if (CurNodeVT == CaseVT) break; - + // Otherwise, skip over this case. MatcherIndex += CaseSize; } - + // If no cases matched, bail out. if (CaseSize == 0) break; - + // Otherwise, execute the case we found. DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); @@ -2135,7 +2339,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckOrImm: if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break; continue; - + case OPC_CheckFoldableChainNode: { assert(NodeStack.size() != 1 && "No parent node"); // Verify that all intermediate nodes between the root and this one have @@ -2156,7 +2360,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, NodeToMatch, OptLevel, true/*We validate our own chains*/)) break; - + continue; } case OPC_EmitInteger: { @@ -2165,22 +2369,24 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); - RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT)); + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getTargetConstant(Val, VT), (SDNode*)0)); continue; } case OPC_EmitRegister: { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; unsigned RegNo = MatcherTable[MatcherIndex++]; - RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT)); + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getRegister(RegNo, VT), (SDNode*)0)); continue; } - + case OPC_EmitConvertToTarget: { // Convert from IMM/FPIMM to target version. unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); - SDValue Imm = RecordedNodes[RecNo]; + SDValue Imm = RecordedNodes[RecNo].first; if (Imm->getOpcode() == ISD::Constant) { int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue(); @@ -2189,11 +2395,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType()); } - - RecordedNodes.push_back(Imm); + + RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); continue; } - + case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1 // These are space-optimized forms of OPC_EmitMergeInputChains. @@ -2201,28 +2407,28 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, "EmitMergeInputChains should be the first chain producing node"); assert(ChainNodesMatched.empty() && "Should only have one EmitMergeInputChains per match"); - + // Read all of the chained nodes. unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); - ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode()); - + ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + // FIXME: What if other value results of the node have uses not matched // by this pattern? if (ChainNodesMatched.back() != NodeToMatch && - !RecordedNodes[RecNo].hasOneUse()) { + !RecordedNodes[RecNo].first.hasOneUse()) { ChainNodesMatched.clear(); break; } - + // Merge the input chains if they are not intra-pattern references. InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); - + if (InputChain.getNode() == 0) break; // Failed to merge. continue; } - + case OPC_EmitMergeInputChains: { assert(InputChain.getNode() == 0 && "EmitMergeInputChains should be the first chain producing node"); @@ -2242,54 +2448,55 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, for (unsigned i = 0; i != NumChains; ++i) { unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); - ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode()); - + ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + // FIXME: What if other value results of the node have uses not matched // by this pattern? if (ChainNodesMatched.back() != NodeToMatch && - !RecordedNodes[RecNo].hasOneUse()) { + !RecordedNodes[RecNo].first.hasOneUse()) { ChainNodesMatched.clear(); break; } } - + // If the inner loop broke out, the match fails. if (ChainNodesMatched.empty()) break; // Merge the input chains if they are not intra-pattern references. InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); - + if (InputChain.getNode() == 0) break; // Failed to merge. continue; } - + case OPC_EmitCopyToReg: { unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; - + if (InputChain.getNode() == 0) InputChain = CurDAG->getEntryNode(); - + InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(), - DestPhysReg, RecordedNodes[RecNo], - InputFlag); - - InputFlag = InputChain.getValue(1); + DestPhysReg, RecordedNodes[RecNo].first, + InputGlue); + + InputGlue = InputChain.getValue(1); continue; } - + case OPC_EmitNodeXForm: { unsigned XFormNo = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); - RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo)); + SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); + RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0)); continue; } - + case OPC_EmitNode: case OPC_MorphNodeTo: { uint16_t TargetOpc = MatcherTable[MatcherIndex++]; @@ -2304,12 +2511,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy; VTs.push_back(VT); } - + if (EmitNodeInfo & OPFL_Chain) VTs.push_back(MVT::Other); - if (EmitNodeInfo & OPFL_FlagOutput) - VTs.push_back(MVT::Flag); - + if (EmitNodeInfo & OPFL_GlueOutput) + VTs.push_back(MVT::Glue); + // This is hot code, so optimize the two most common cases of 1 and 2 // results. SDVTList VTList; @@ -2327,11 +2534,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned RecNo = MatcherTable[MatcherIndex++]; if (RecNo & 128) RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); - + assert(RecNo < RecordedNodes.size() && "Invalid EmitNode"); - Ops.push_back(RecordedNodes[RecNo]); + Ops.push_back(RecordedNodes[RecNo].first); } - + // If there are variadic operands to add, handle them now. if (EmitNodeInfo & OPFL_VariadicInfo) { // Determine the start index to copy from. @@ -2339,22 +2546,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0; assert(NodeToMatch->getNumOperands() >= FirstOpToCopy && "Invalid variadic node"); - // Copy all of the variadic operands, not including a potential flag + // Copy all of the variadic operands, not including a potential glue // input. for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands(); i != e; ++i) { SDValue V = NodeToMatch->getOperand(i); - if (V.getValueType() == MVT::Flag) break; + if (V.getValueType() == MVT::Glue) break; Ops.push_back(V); } } - - // If this has chain/flag inputs, add them. + + // If this has chain/glue inputs, add them. if (EmitNodeInfo & OPFL_Chain) Ops.push_back(InputChain); - if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0) - Ops.push_back(InputFlag); - + if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0) + Ops.push_back(InputGlue); + // Create the node. SDNode *Res = 0; if (Opcode != OPC_MorphNodeTo) { @@ -2362,28 +2569,29 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // add the results to the RecordedNodes list. Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(), VTList, Ops.data(), Ops.size()); - - // Add all the non-flag/non-chain results to the RecordedNodes list. + + // Add all the non-glue/non-chain results to the RecordedNodes list. for (unsigned i = 0, e = VTs.size(); i != e; ++i) { - if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break; - RecordedNodes.push_back(SDValue(Res, i)); + if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break; + RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i), + (SDNode*) 0)); } - + } else { Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(), EmitNodeInfo); } - - // If the node had chain/flag results, update our notion of the current - // chain and flag. - if (EmitNodeInfo & OPFL_FlagOutput) { - InputFlag = SDValue(Res, VTs.size()-1); + + // If the node had chain/glue results, update our notion of the current + // chain and glue. + if (EmitNodeInfo & OPFL_GlueOutput) { + InputGlue = SDValue(Res, VTs.size()-1); if (EmitNodeInfo & OPFL_Chain) InputChain = SDValue(Res, VTs.size()-2); } else if (EmitNodeInfo & OPFL_Chain) InputChain = SDValue(Res, VTs.size()-1); - // If the OPFL_MemRefs flag is set on this node, slap all of the + // If the OPFL_MemRefs glue is set on this node, slap all of the // accumulated memrefs onto it. // // FIXME: This is vastly incorrect for patterns with multiple outputs @@ -2396,37 +2604,37 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, cast<MachineSDNode>(Res) ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size()); } - + DEBUG(errs() << " " << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created") << " node: "; Res->dump(CurDAG); errs() << "\n"); - + // If this was a MorphNodeTo then we're completely done! if (Opcode == OPC_MorphNodeTo) { - // Update chain and flag uses. - UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched, - InputFlag, FlagResultNodesMatched, true); + // Update chain and glue uses. + UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, + InputGlue, GlueResultNodesMatched, true); return Res; } - + continue; } - - case OPC_MarkFlagResults: { + + case OPC_MarkGlueResults: { unsigned NumNodes = MatcherTable[MatcherIndex++]; - - // Read and remember all the flag-result nodes. + + // Read and remember all the glue-result nodes. for (unsigned i = 0; i != NumNodes; ++i) { unsigned RecNo = MatcherTable[MatcherIndex++]; if (RecNo & 128) RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); - FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode()); + GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); } continue; } - + case OPC_CompleteMatch: { // The match has been completed, and any new nodes (if any) have been // created. Patch up references to the matched dag to use the newly @@ -2437,13 +2645,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned ResSlot = MatcherTable[MatcherIndex++]; if (ResSlot & 128) ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); - + assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame"); - SDValue Res = RecordedNodes[ResSlot]; - + SDValue Res = RecordedNodes[ResSlot].first; + assert(i < NodeToMatch->getNumValues() && NodeToMatch->getValueType(i) != MVT::Other && - NodeToMatch->getValueType(i) != MVT::Flag && + NodeToMatch->getValueType(i) != MVT::Glue && "Invalid number of results to complete!"); assert((NodeToMatch->getValueType(i) == Res.getValueType() || NodeToMatch->getValueType(i) == MVT::iPTR || @@ -2454,24 +2662,23 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); } - // If the root node defines a flag, add it to the flag nodes to update - // list. - if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag) - FlagResultNodesMatched.push_back(NodeToMatch); - - // Update chain and flag uses. - UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched, - InputFlag, FlagResultNodesMatched, false); - + // If the root node defines glue, add it to the glue nodes to update list. + if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue) + GlueResultNodesMatched.push_back(NodeToMatch); + + // Update chain and glue uses. + UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, + InputGlue, GlueResultNodesMatched, false); + assert(NodeToMatch->use_empty() && "Didn't replace all uses of the node?"); - + // FIXME: We just return here, which interacts correctly with SelectRoot // above. We should fix this to not return an SDNode* anymore. return 0; } } - + // If the code reached this point, then the match failed. See if there is // another child to try in the current 'Scope', otherwise pop it until we // find a case to check. @@ -2494,15 +2701,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size()) MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); MatcherIndex = LastScope.FailIndex; - + DEBUG(errs() << " Continuing at " << MatcherIndex << "\n"); - + InputChain = LastScope.InputChain; - InputFlag = LastScope.InputFlag; + InputGlue = LastScope.InputGlue; if (!LastScope.HasChainNodesMatched) ChainNodesMatched.clear(); - if (!LastScope.HasFlagResultNodesMatched) - FlagResultNodesMatched.clear(); + if (!LastScope.HasGlueResultNodesMatched) + GlueResultNodesMatched.clear(); // Check to see what the offset is at the new MatcherIndex. If it is zero // we have reached the end of this scope, otherwise we have another child @@ -2517,21 +2724,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, LastScope.FailIndex = MatcherIndex+NumToSkip; break; } - + // End of this scope, pop it and try the next child in the containing // scope. MatchScopes.pop_back(); } } } - + void SelectionDAGISel::CannotYetSelect(SDNode *N) { std::string msg; raw_string_ostream Msg(msg); - Msg << "Cannot yet select: "; - + Msg << "Cannot select: "; + if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN && N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && N->getOpcode() != ISD::INTRINSIC_VOID) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 8313de5..76eb945 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -93,7 +93,7 @@ namespace llvm { static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { SDValue Op = EI.getNode()->getOperand(EI.getOperand()); EVT VT = Op.getValueType(); - if (VT == MVT::Flag) + if (VT == MVT::Glue) return "color=red,style=bold"; else if (VT == MVT::Other) return "color=blue,style=dashed"; @@ -273,14 +273,14 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { raw_string_ostream O(s); O << "SU(" << SU->NodeNum << "): "; if (SU->getNode()) { - SmallVector<SDNode *, 4> FlaggedNodes; - for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) - FlaggedNodes.push_back(N); - while (!FlaggedNodes.empty()) { + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { O << DOTGraphTraits<SelectionDAG*> - ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); - FlaggedNodes.pop_back(); - if (!FlaggedNodes.empty()) + ::getSimpleNodeLabel(GluedNodes.back(), DAG); + GluedNodes.pop_back(); + if (!GluedNodes.empty()) O << "\n "; } } else { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b74f600..691390e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include <cctype> using namespace llvm; namespace llvm { @@ -530,7 +531,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); } - + // These operations default to expand. setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); @@ -538,8 +539,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm, // Most targets ignore the @llvm.prefetch intrinsic. setOperationAction(ISD::PREFETCH, MVT::Other, Expand); - - // ConstantFP nodes default to expand. Targets can either change this to + + // ConstantFP nodes default to expand. Targets can either change this to // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. setOperationAction(ISD::ConstantFP, MVT::f32, Expand); @@ -560,18 +561,21 @@ TargetLowering::TargetLowering(const TargetMachine &tm, // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); - + IsLittleEndian = TD->isLittleEndian(); ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; + maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize + = maxStoresPerMemmoveOptSize = 4; benefitFromCodePlacementOpt = false; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; SelectIsExpensive = false; IntDivIsCheap = false; Pow2DivIsCheap = false; + JumpIsExpensive = false; StackPointerRegisterToSaveRestore = 0; ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; @@ -617,16 +621,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType(); - + unsigned NumVectorRegs = 1; - - // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we // could break down into LHS/RHS like LegalizeDAG does. if (!isPowerOf2_32(NumElts)) { NumVectorRegs = NumElts; NumElts = 1; } - + // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { @@ -635,7 +639,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, } NumIntermediates = NumVectorRegs; - + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); if (!TLI->isTypeLegal(NewVT)) NewVT = EltTy; @@ -645,7 +649,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); - + // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. return NumVectorRegs; @@ -750,7 +754,7 @@ void TargetLowering::computeRegisterProperties() { RegisterTypeForVT[MVT::ppcf128] = MVT::f64; TransformToType[MVT::ppcf128] = MVT::f64; ValueTypeActions.setTypeAction(MVT::ppcf128, Expand); - } + } // Decide how to handle f64. If the target does not have native f64 support, // expand it to i64 and we will be generating soft float library calls. @@ -776,13 +780,13 @@ void TargetLowering::computeRegisterProperties() { ValueTypeActions.setTypeAction(MVT::f32, Expand); } } - + // Loop over all of the vector value types to see which need transformations. for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT VT = (MVT::SimpleValueType)i; if (isTypeLegal(VT)) continue; - + // Determine if there is a legal wider type. If so, we should promote to // that wider vector type. EVT EltVT = VT.getVectorElementType(); @@ -792,8 +796,8 @@ void TargetLowering::computeRegisterProperties() { for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { EVT SVT = (MVT::SimpleValueType)nVT; if (SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && - isTypeSynthesizable(SVT)) { + SVT.getVectorNumElements() > NElts && + isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -804,7 +808,7 @@ void TargetLowering::computeRegisterProperties() { } if (IsLegalWiderType) continue; } - + MVT IntermediateVT; EVT RegisterVT; unsigned NumIntermediates; @@ -812,7 +816,7 @@ void TargetLowering::computeRegisterProperties() { getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, RegisterVT, this); RegisterTypeForVT[i] = RegisterVT; - + EVT NVT = VT.getPow2VectorType(); if (NVT == VT) { // Type is already a power of 2. The default action is to split. @@ -865,7 +869,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, unsigned &NumIntermediates, EVT &RegisterVT) const { unsigned NumElts = VT.getVectorNumElements(); - + // If there is a wider vector type with the same element type as this one, // we should widen to that legal vector type. This handles things like // <2 x float> -> <4 x float>. @@ -877,19 +881,19 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, return 1; } } - + // Figure out the right, legal destination reg to copy into. EVT EltTy = VT.getVectorElementType(); - + unsigned NumVectorRegs = 1; - - // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we // could break down into LHS/RHS like LegalizeDAG does. if (!isPowerOf2_32(NumElts)) { NumVectorRegs = NumElts; NumElts = 1; } - + // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. while (NumElts > 1 && !isTypeLegal( @@ -899,7 +903,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, } NumIntermediates = NumVectorRegs; - + EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); if (!isTypeLegal(NewVT)) NewVT = EltTy; @@ -909,13 +913,13 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, RegisterVT = DestVT; if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); - + // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. return NumVectorRegs; } -/// Get the EVTs and ArgFlags collections that represent the legalized return +/// Get the EVTs and ArgFlags collections that represent the legalized return /// type of the given function. This does not require a DAG or a return value, /// and is suitable for use before any DAGs for the function are constructed. /// TODO: Move this out of TargetLowering.cpp. @@ -988,11 +992,11 @@ unsigned TargetLowering::getJumpTableEncoding() const { // In non-pic modes, just use the address of a block. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return MachineJumpTableInfo::EK_BlockAddress; - + // In PIC mode, if the target supports a GPRel32 directive, use it. if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0) return MachineJumpTableInfo::EK_GPRel32BlockAddress; - + // Otherwise, use a label difference. return MachineJumpTableInfo::EK_LabelDifference32; } @@ -1036,11 +1040,11 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // Optimization Methods //===----------------------------------------------------------------------===// -/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// ShrinkDemandedConstant - Check to see if the specified operand of the /// specified instruction is a constant integer. If so, check to see if there /// are any bits set in the constant that are not demanded. If so, shrink the /// constant and return true. -bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded) { DebugLoc dl = Op.getDebugLoc(); @@ -1062,7 +1066,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, EVT VT = Op.getValueType(); SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), DAG.getConstant(Demanded & - C->getAPIntValue(), + C->getAPIntValue(), VT)); return CombineTo(Op, New); } @@ -1139,9 +1143,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownZero = KnownOne = APInt(BitWidth, 0); // Other users may use these bits. - if (!Op.getNode()->hasOneUse()) { + if (!Op.getNode()->hasOneUse()) { if (Depth != 0) { - // If not at the root, Just compute the KnownZero/KnownOne bits to + // If not at the root, Just compute the KnownZero/KnownOne bits to // simplify things downstream. TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); return false; @@ -1149,7 +1153,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If this is the root being simplified, allow it to have multiple uses, // just set the NewMask to all bits. NewMask = APInt::getAllOnesValue(BitWidth); - } else if (DemandedMask == 0) { + } else if (DemandedMask == 0) { // Not demanding any bits from Op. if (Op.getOpcode() != ISD::UNDEF) return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); @@ -1172,8 +1176,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // the RHS. if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { APInt LHSZero, LHSOne; + // Do not increment Depth here; that can cause an infinite loop. TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, - LHSZero, LHSOne, Depth+1); + LHSZero, LHSOne, Depth); // If the LHS already has zeros where RHSC does, this and is dead. if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op.getOperand(0)); @@ -1182,16 +1187,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) return true; } - + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, KnownZero2, KnownOne2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) @@ -1214,15 +1219,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownZero |= KnownZero2; break; case ISD::OR: - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, KnownZero2, KnownOne2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) @@ -1248,15 +1253,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne |= KnownOne2; break; case ISD::XOR: - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, KnownOne2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. if ((KnownZero & NewMask) == NewMask) @@ -1274,12 +1279,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); - + // Output known-0 bits are known if clear or set in both the LHS & RHS. KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); // Output known-1 are known to be set if set in only one of the LHS, RHS. KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); - + // If all of the demanded bits on one side are known, and all of the set // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. @@ -1288,11 +1293,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownOne & KnownOne2) == KnownOne) { EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), ANDC)); } } - + // If the RHS is a constant, see if we can simplify it. // for XOR, we prefer to force bits to 1 if they will make a -1. // if we can't force bits, try to shrink constant @@ -1317,37 +1322,37 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne = KnownOneOut; break; case ISD::SELECT: - if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, KnownOne2, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // If the operands are constants, see if we can simplify them. if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; - + // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; break; case ISD::SELECT_CC: - if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, + if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, KnownOne2, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // If the operands are constants, see if we can simplify them. if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; - + // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; @@ -1373,16 +1378,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Diff < 0) { Diff = -Diff; Opc = ISD::SRL; - } - - SDValue NewSA = + } + + SDValue NewSA = TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); EVT VT = Op.getValueType(); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); } - } - + } + if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), KnownZero, KnownOne, TLO, Depth+1)) return true; @@ -1421,7 +1426,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, unsigned ShAmt = SA->getZExtValue(); unsigned VTSize = VT.getSizeInBits(); SDValue InOp = Op.getOperand(0); - + // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) break; @@ -1438,20 +1443,20 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Diff < 0) { Diff = -Diff; Opc = ISD::SHL; - } - + } + SDValue NewSA = TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); } - } - + } + // Compute the new bits that are at the top now. if (SimplifyDemandedBits(InOp, (NewMask << ShAmt), KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); @@ -1472,7 +1477,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { EVT VT = Op.getValueType(); unsigned ShAmt = SA->getZExtValue(); - + // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) break; @@ -1484,21 +1489,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); if (HighBits.intersects(NewMask)) InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits()); - + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); - + // Handle the sign bit, adjusted to where it is now in the mask. APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); - + // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), Op.getOperand(1))); } else if (KnownOne.intersects(SignBit)) { // New bits are known one. @@ -1509,23 +1514,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::SIGN_EXTEND_INREG: { EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - // Sign extension. Compute the demanded bits in the result that are not + // Sign extension. Compute the demanded bits in the result that are not // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EVT.getScalarType().getSizeInBits()); - + // If none of the extended bits are demanded, eliminate the sextinreg. if ((NewBits & NewMask) == 0) return TLO.CombineTo(Op, Op.getOperand(0)); - APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits()); - InSignBit.zext(BitWidth); + APInt InSignBit = + APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EVT.getScalarType().getSizeInBits()) & NewMask; - + // Since the sign extended bits are demanded, we know that the sign // bit is demanded. InputDemandedBits |= InSignBit; @@ -1533,16 +1538,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. - + // If the input sign bit is known zero, convert this into a zero extension. if (KnownZero.intersects(InSignBit)) - return TLO.CombineTo(Op, + return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT)); - + if (KnownOne.intersects(InSignBit)) { // Input sign bit known set KnownOne |= NewBits; KnownZero &= ~NewBits; @@ -1555,23 +1560,22 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::ZERO_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); - APInt InMask = NewMask; - InMask.trunc(OperandBitWidth); - + APInt InMask = NewMask.trunc(OperandBitWidth); + // If none of the top bits are demanded, convert this into an any_extend. APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; if (!NewBits.intersects(NewMask)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, - Op.getValueType(), + Op.getValueType(), Op.getOperand(0))); - + if (SimplifyDemandedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; break; } @@ -1581,31 +1585,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); APInt NewBits = ~InMask & NewMask; - + // If none of the top bits are demanded, convert this into an any_extend. if (NewBits == 0) return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); - + // Since some of the sign extended bits are demanded, we know that the sign // bit is demanded. APInt InDemandedBits = InMask & NewMask; InDemandedBits |= InSignBit; - InDemandedBits.trunc(InBits); - - if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, + InDemandedBits = InDemandedBits.trunc(InBits); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, KnownOne, TLO, Depth+1)) return true; - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); - + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + // If the sign bit is known zero, convert this to a zero extend. if (KnownZero.intersects(InSignBit)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, - Op.getValueType(), + Op.getValueType(), Op.getOperand(0))); - + // If the sign bit is known one, the top bits match. if (KnownOne.intersects(InSignBit)) { KnownOne |= NewBits; @@ -1619,14 +1623,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::ANY_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); - APInt InMask = NewMask; - InMask.trunc(OperandBitWidth); + APInt InMask = NewMask.trunc(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); break; } case ISD::TRUNCATE: { @@ -1634,14 +1637,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // zero/one bits live out. unsigned OperandBitWidth = Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); - APInt TruncMask = NewMask; - TruncMask.zext(OperandBitWidth); + APInt TruncMask = NewMask.zext(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - KnownZero.trunc(BitWidth); - KnownOne.trunc(BitWidth); - + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); + // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. if (Op.getOperand(0).getNode()->hasOneUse()) { @@ -1661,25 +1663,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); - HighBits = HighBits.lshr(ShAmt->getZExtValue()); - HighBits.trunc(BitWidth); + HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth); if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { // None of the shifted in bits are needed. Add a truncate of the // shift input, then shift it. SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, - Op.getValueType(), + Op.getValueType(), In.getOperand(0)); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), - NewTrunc, + NewTrunc, In.getOperand(1))); } break; } } - - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); break; } case ISD::AssertZext: { @@ -1689,7 +1690,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, @@ -1697,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownZero |= ~InMask & NewMask; break; } - case ISD::BIT_CONVERT: + case ISD::BITCAST: #if 0 // If this is an FP->Int bitcast and if the sign bit is the only thing that // is demanded, turn this into a FGETSIGN. @@ -1709,7 +1710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, isOperationLegal(ISD::FGETSIGN, Op.getValueType())) { // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. - SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), + SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), Op.getOperand(0)); unsigned ShVal = Op.getValueType().getSizeInBits()-1; SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy()); @@ -1742,21 +1743,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); break; } - + // If we know the value of all of the demanded bits, return this as a // constant. if ((NewMask & (KnownZero|KnownOne)) == NewMask) return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); - + return false; } -/// computeMaskedBitsForTargetNode - Determine which of the bits specified -/// in Mask are known to be either zero or one and return them in the +/// computeMaskedBitsForTargetNode - Determine which of the bits specified +/// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. -void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, +void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, const APInt &Mask, - APInt &KnownZero, + APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { @@ -1817,7 +1818,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { (KnownOne.countPopulation() == 1); } -/// SimplifySetCC - Try to simplify a setcc built with the specified operands +/// SimplifySetCC - Try to simplify a setcc built with the specified operands /// and cc. If it is unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -1869,6 +1870,30 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + SDValue CTPOP = N0; + // Look through truncs that don't change the value of a ctpop. + if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE) + CTPOP = N0.getOperand(0); + + if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP && + (N0 == CTPOP || N0.getValueType().getSizeInBits() > + Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) { + EVT CTVT = CTPOP.getValueType(); + SDValue CTOp = CTPOP.getOperand(0); + + // (ctpop x) u< 2 -> (x & x-1) == 0 + // (ctpop x) u> 1 -> (x & x-1) != 0 + if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ + SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, + DAG.getConstant(1, CTVT)); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); + ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; + return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC); + } + + // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. + } + // If the LHS is '(and load, const)', the RHS is 0, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. @@ -1884,7 +1909,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (!Lod->isVolatile() && Lod->isUnindexed()) { unsigned origWidth = N0.getValueType().getSizeInBits(); unsigned maskWidth = origWidth; - // We can narrow (e.g.) 16-bit extending loads on 32-bit target to + // We can narrow (e.g.) 16-bit extending loads on 32-bit target to // 8 bits, but have to be careful... if (Lod->getExtensionType() != ISD::NON_EXTLOAD) origWidth = Lod->getMemoryVT().getSizeInBits(); @@ -1916,10 +1941,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getConstant(bestOffset, PtrType)); unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, - Lod->getSrcValue(), - Lod->getSrcValueOffset() + bestOffset, + Lod->getPointerInfo().getWithOffset(bestOffset), false, false, NewAlign); - return DAG.getSetCC(dl, VT, + return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), newVT)), @@ -1969,7 +1993,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (isOperationLegal(ISD::SETCC, newVT) && getCondCodeAction(Cond, newVT)==Legal)) return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(APInt(C1).trunc(InSize), newVT), + DAG.getConstant(C1.trunc(InSize), newVT), Cond); break; } @@ -1987,7 +2011,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // the sign extension, it is impossible for both sides to be equal. if (C1.getMinSignedBits() > ExtSrcTyBits) return DAG.getConstant(Cond == ISD::SETNE, VT); - + SDValue ZextOp; EVT Op0Ty = N0.getOperand(0).getValueType(); if (Op0Ty == ExtSrcTy) { @@ -2000,10 +2024,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(ZextOp.getNode()); // Otherwise, make this a use of a zext. - return DAG.getSetCC(dl, VT, ZextOp, + return DAG.getSetCC(dl, VT, ZextOp, DAG.getConstant(C1 & APInt::getLowBitsSet( ExtDstTyBits, - ExtSrcTyBits), + ExtSrcTyBits), ExtDstTy), Cond); } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && @@ -2013,16 +2037,16 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); if (TrueWhenTrue) - return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); + return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); // Invert the condition. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - CC = ISD::getSetCCInverse(CC, + CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType().isInteger()); return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); } if ((N0.getOpcode() == ISD::XOR || - (N0.getOpcode() == ISD::AND && + (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR && N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && isa<ConstantSDNode>(N0.getOperand(1)) && @@ -2038,7 +2062,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0.getOpcode() == ISD::XOR) Val = N0.getOperand(0); else { - assert(N0.getOpcode() == ISD::AND && + assert(N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR); // ((X^1)&1)^1 -> X & 1 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), @@ -2082,7 +2106,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } } - + APInt MinVal, MaxVal; unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); if (ISD::isSignedIntSetCC(Cond)) { @@ -2097,7 +2121,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true // X >= C0 --> X > (C0-1) - return DAG.getSetCC(dl, VT, N0, + return DAG.getSetCC(dl, VT, N0, DAG.getConstant(C1-1, N1.getValueType()), (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); } @@ -2105,7 +2129,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETLE || Cond == ISD::SETULE) { if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true // X <= C0 --> X < (C0+1) - return DAG.getSetCC(dl, VT, N0, + return DAG.getSetCC(dl, VT, N0, DAG.getConstant(C1+1, N1.getValueType()), (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); } @@ -2128,12 +2152,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If we have setult X, 1, turn it into seteq X, 0 if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MinVal, N0.getValueType()), + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, N0.getValueType()), ISD::SETEQ); // If we have setugt X, Max-1, turn it into seteq X, Max else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) - return DAG.getSetCC(dl, VT, N0, + return DAG.getSetCC(dl, VT, N0, DAG.getConstant(MaxVal, N0.getValueType()), ISD::SETEQ); @@ -2141,9 +2165,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // by changing cc. // SETUGT X, SINTMAX -> SETLT X, 0 - if (Cond == ISD::SETUGT && + if (Cond == ISD::SETUGT && C1 == APInt::getSignedMaxValue(OperandBitSize)) - return DAG.getSetCC(dl, VT, N0, + return DAG.getSetCC(dl, VT, N0, DAG.getConstant(0, N1.getValueType()), ISD::SETLT); @@ -2203,7 +2227,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getUNDEF(VT); } } - + // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the // constant if knowing that the operand is non-nan is enough. We prefer to // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to @@ -2278,14 +2302,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (DAG.isCommutativeBinOp(N0.getOpcode())) { // If X op Y == Y op X, try other combinations. if (N0.getOperand(0) == N1.getOperand(1)) - return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), Cond); if (N0.getOperand(1) == N1.getOperand(0)) - return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), Cond); } } - + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 @@ -2295,7 +2319,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, LHSR->getAPIntValue(), N0.getValueType()), Cond); } - + // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. if (N0.getOpcode() == ISD::XOR) // If we know that all of the inverted bits are zero, don't bother @@ -2308,7 +2332,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getValueType()), Cond); } - + // Turn (C1-X) == C2 --> X == C1-C2 if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { @@ -2319,7 +2343,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getValueType()), Cond); } - } + } } // Simplify (X+Z) == X --> Z == 0 @@ -2334,7 +2358,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); // (Z-X) == X --> Z == X<<1 SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), - N1, + N1, DAG.getConstant(1, getShiftAmountTy())); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); @@ -2356,7 +2380,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } else if (N1.getNode()->hasOneUse()) { assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); // X == (Z-X) --> X<<1 == Z - SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, DAG.getConstant(1, getShiftAmountTy())); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); @@ -2443,7 +2467,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the /// node is a GlobalAddress + offset. -bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA, +bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const { if (isa<GlobalAddressSDNode>(N)) { GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N); @@ -2469,6 +2493,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA, } } } + return false; } @@ -2497,7 +2522,10 @@ TargetLowering::getConstraintType(const std::string &Constraint) const { return C_Memory; case 'i': // Simple Integer or Relocatable Constant case 'n': // Simple Integer + case 'E': // Floating Point Constant + case 'F': // Floating Point Constant case 's': // Relocatable Constant + case 'p': // Address. case 'X': // Allow ANY value. case 'I': // Target registers. case 'J': @@ -2507,11 +2535,13 @@ TargetLowering::getConstraintType(const std::string &Constraint) const { case 'N': case 'O': case 'P': + case '<': + case '>': return C_Other; } } - - if (Constraint.size() > 1 && Constraint[0] == '{' && + + if (Constraint.size() > 1 && Constraint[0] == '{' && Constraint[Constraint.size()-1] == '}') return C_Register; return C_Unknown; @@ -2550,7 +2580,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // is possible and fine if either GV or C are missing. ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); - + // If we have "(add GV, C)", pull out GV/C if (Op.getOpcode() == ISD::ADD) { C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); @@ -2562,14 +2592,14 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (C == 0 || GA == 0) C = 0, GA = 0; } - + // If we find a valid operand, map to the TargetXXX version so that the // value itself doesn't get selected. if (GA) { // Either &GV or &GV+C if (ConstraintLetter != 'n') { int64_t Offs = GA->getOffset(); if (C) Offs += C->getZExtValue(); - Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), C ? C->getDebugLoc() : DebugLoc(), Op.getValueType(), Offs)); return; @@ -2613,8 +2643,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), E = RI->regclass_end(); RCI != E; ++RCI) { const TargetRegisterClass *RC = *RCI; - - // If none of the value types for this register class are valid, we + + // If none of the value types for this register class are valid, we // can't use it. For example, 64-bit reg classes on 32-bit targets. bool isLegal = false; for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); @@ -2624,16 +2654,16 @@ getRegForInlineAsmConstraint(const std::string &Constraint, break; } } - + if (!isLegal) continue; - - for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { if (RegName.equals_lower(RI->getName(*I))) return std::make_pair(*I, RC); } } - + return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0)); } @@ -2655,6 +2685,186 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { } +/// ParseConstraints - Split up the constraint string from the inline +/// assembly value into the specific constraints and their prefixes, +/// and also tie in the associated operand values. +/// If this returns an empty vector, and if the constraint string itself +/// isn't empty, there was an error parsing. +TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( + ImmutableCallSite CS) const { + /// ConstraintOperands - Information about all of the constraints. + AsmOperandInfoVector ConstraintOperands; + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + unsigned maCount = 0; // Largest number of multiple alternative constraints. + + // Do a prepass over the constraints, canonicalizing them, and building up the + // ConstraintOperands list. + InlineAsm::ConstraintInfoVector + ConstraintInfos = IA->ParseConstraints(); + + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i])); + AsmOperandInfo &OpInfo = ConstraintOperands.back(); + + // Update multiple alternative constraint count. + if (OpInfo.multipleAlternatives.size() > maCount) + maCount = OpInfo.multipleAlternatives.size(); + + OpInfo.ConstraintVT = MVT::Other; + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + // Indirect outputs just consume an argument. + if (OpInfo.isIndirect) { + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + } + + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(!CS.getType()->isVoidTy() && + "Bad inline asm!"); + if (const StructType *STy = dyn_cast<StructType>(CS.getType())) { + OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpInfo.ConstraintVT = getValueType(CS.getType()); + } + ++ResNo; + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + if (OpInfo.CallOperandVal) { + const llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); + if (OpInfo.isIndirect) { + const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + if (!PtrTy) + report_fatal_error("Indirect operand for inline asm not a pointer!"); + OpTy = PtrTy->getElementType(); + } + // If OpTy is not a single value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isSingleValueType() && OpTy->isSized()) { + unsigned BitSize = TD->getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + case 128: + OpInfo.ConstraintVT = + EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true); + break; + } + } else if (dyn_cast<PointerType>(OpTy)) { + OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize()); + } else { + OpInfo.ConstraintVT = EVT::getEVT(OpTy, true); + } + } + } + + // If we have multiple alternative constraints, select the best alternative. + if (ConstraintInfos.size()) { + if (maCount) { + unsigned bestMAIndex = 0; + int bestWeight = -1; + // weight: -1 = invalid match, and 0 = so-so match to 5 = good match. + int weight = -1; + unsigned maIndex; + // Compute the sums of the weights for each alternative, keeping track + // of the best (highest weight) one so far. + for (maIndex = 0; maIndex < maCount; ++maIndex) { + int weightSum = 0; + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; + if (OpInfo.Type == InlineAsm::isClobber) + continue; + + // If this is an output operand with a matching input operand, + // look up the matching input. If their types mismatch, e.g. one + // is an integer, the other is floating point, or their sizes are + // different, flag it as an maCantMatch. + if (OpInfo.hasMatchingInput()) { + AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (OpInfo.ConstraintVT.getSizeInBits() != + Input.ConstraintVT.getSizeInBits())) { + weightSum = -1; // Can't match. + break; + } + } + } + weight = getMultipleConstraintMatchWeight(OpInfo, maIndex); + if (weight == -1) { + weightSum = -1; + break; + } + weightSum += weight; + } + // Update best. + if (weightSum > bestWeight) { + bestWeight = weightSum; + bestMAIndex = maIndex; + } + } + + // Now select chosen alternative in each constraint. + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo& cInfo = ConstraintOperands[cIndex]; + if (cInfo.Type == InlineAsm::isClobber) + continue; + cInfo.selectAlternative(bestMAIndex); + } + } + } + + // Check and hook up tied operands, choose constraint code to use. + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; + + // If this is an output operand with a matching input operand, look up the + // matching input. If their types mismatch, e.g. one is an integer, the + // other is floating point, or their sizes are different, flag it as an + // error. + if (OpInfo.hasMatchingInput()) { + AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (OpInfo.ConstraintVT.getSizeInBits() != + Input.ConstraintVT.getSizeInBits())) { + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); + } + } + + } + } + + return ConstraintOperands; +} + + /// getConstraintGenerality - Return an integer indicating how general CT /// is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { @@ -2672,6 +2882,79 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { } } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight + TargetLowering::getMultipleConstraintMatchWeight( + AsmOperandInfo &info, int maIndex) const { + InlineAsm::ConstraintCodeVector *rCodes; + if (maIndex >= (int)info.multipleAlternatives.size()) + rCodes = &info.Codes; + else + rCodes = &info.multipleAlternatives[maIndex].Codes; + ConstraintWeight BestWeight = CW_Invalid; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = rCodes->size(); i != e; ++i) { + ConstraintWeight weight = + getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str()); + if (weight > BestWeight) + BestWeight = weight; + } + + return BestWeight; +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight + TargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + // Look at the constraint type. + switch (*constraint) { + case 'i': // immediate integer. + case 'n': // immediate integer with a known value. + if (isa<ConstantInt>(CallOperandVal)) + weight = CW_Constant; + break; + case 's': // non-explicit intregal immediate. + if (isa<GlobalValue>(CallOperandVal)) + weight = CW_Constant; + break; + case 'E': // immediate float if host format. + case 'F': // immediate float. + if (isa<ConstantFP>(CallOperandVal)) + weight = CW_Constant; + break; + case '<': // memory operand with autodecrement. + case '>': // memory operand with autoincrement. + case 'm': // memory operand. + case 'o': // offsettable memory operand + case 'V': // non-offsettable memory operand + weight = CW_Memory; + break; + case 'r': // general register. + case 'g': // general register, memory operand or immediate integer. + // note: Clang converts "g" to "imr". + if (CallOperandVal->getType()->isIntegerTy()) + weight = CW_Register; + break; + case 'X': // any operand. + default: + weight = CW_Default; + break; + } + return weight; +} + /// ChooseConstraint - If there are multiple different constraints that we /// could pick for this operand (e.g. "imr") try to pick the 'best' one. /// This is somewhat tricky: constraints fall into four classes: @@ -2721,12 +3004,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, break; } } - + // Things with matching constraints can only be registers, per gcc // documentation. This mainly affects "g" constraints. if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) continue; - + // This constraint letter is more general than the previous one, use it. int Generality = getConstraintGenerality(CType); if (Generality > BestGenerality) { @@ -2735,7 +3018,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, BestGenerality = Generality; } } - + OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; OpInfo.ConstraintType = BestType; } @@ -2744,10 +3027,10 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, /// type to use for the specific AsmOperandInfo, setting /// OpInfo.ConstraintCode and OpInfo.ConstraintType. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, - SDValue Op, + SDValue Op, SelectionDAG *DAG) const { assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); - + // Single-letter constraints ('r') are very common. if (OpInfo.Codes.size() == 1) { OpInfo.ConstraintCode = OpInfo.Codes[0]; @@ -2755,7 +3038,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, } else { ChooseConstraint(OpInfo, *this, Op, DAG); } - + // 'X' matches anything. if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { // Labels and constants are handled elsewhere ('X' is the only thing @@ -2766,7 +3049,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, OpInfo.CallOperandVal = v; return; } - + // Otherwise, try to resolve it to something we know about by looking at // the actual operand type. if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { @@ -2782,7 +3065,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. -bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, +bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { // The default implementation of this implements a conservative RISCy, r+r and // r+i addr mode. @@ -2790,12 +3073,12 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, // Allows a sign-extended 16-bit immediate field. if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) return false; - + // No global is ever allowed as a base. if (AM.BaseGV) return false; - - // Only support r+r, + + // Only support r+r, switch (AM.Scale) { case 0: // "r+i" or just "i", depending on HasBaseReg. break; @@ -2810,7 +3093,7 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, // Allow 2*r as r+r. break; } - + return true; } @@ -2818,19 +3101,19 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> -SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, +SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, std::vector<SDNode*>* Created) const { EVT VT = N->getValueType(0); DebugLoc dl= N->getDebugLoc(); - + // Check to see if we can do this. // FIXME: We should be more aggressive here. if (!isTypeLegal(VT)) return SDValue(); - + APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); APInt::ms magics = d.magic(); - + // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type SDValue Q; @@ -2844,7 +3127,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, else return SDValue(); // No mulhs or equvialent // If d > 0 and m < 0, add the numerator - if (d.isStrictlyPositive() && magics.m.isNegative()) { + if (d.isStrictlyPositive() && magics.m.isNegative()) { Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); if (Created) Created->push_back(Q.getNode()); @@ -2857,7 +3140,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, } // Shift right algebraic if shift value is nonzero if (magics.s > 0) { - Q = DAG.getNode(ISD::SRA, dl, VT, Q, + Q = DAG.getNode(ISD::SRA, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy())); if (Created) Created->push_back(Q.getNode()); @@ -2908,20 +3191,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, if (magics.a == 0) { assert(magics.s < N1C->getAPIntValue().getBitWidth() && "We shouldn't generate an undefined shift!"); - return DAG.getNode(ISD::SRL, dl, VT, Q, + return DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy())); } else { SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); if (Created) Created->push_back(NPQ.getNode()); - NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, + NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, getShiftAmountTy())); if (Created) Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); if (Created) Created->push_back(NPQ.getNode()); - return DAG.getNode(ISD::SRL, dl, VT, NPQ, + return DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(magics.s-1, getShiftAmountTy())); } } |