diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG')
27 files changed, 5735 insertions, 2949 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 69cf8d9..2abcdd5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dagcombine" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -40,6 +39,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "dagcombine" + STATISTIC(NodesCombined , "Number of dag nodes combined"); STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); @@ -50,11 +51,22 @@ STATISTIC(SlicedLoads, "Number of load sliced"); namespace { static cl::opt<bool> CombinerAA("combiner-alias-analysis", cl::Hidden, - cl::desc("Turn on alias analysis during testing")); + cl::desc("Enable DAG combiner alias-analysis heuristics")); static cl::opt<bool> CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, - cl::desc("Include global information in alias analysis")); + cl::desc("Enable DAG combiner's use of IR alias analysis")); + + static cl::opt<bool> + UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), + cl::desc("Enable DAG combiner's use of TBAA")); + +#ifndef NDEBUG + static cl::opt<std::string> + CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, + cl::desc("Only use DAG-combiner alias analysis in this" + " function")); +#endif /// Hidden option to stress test load slicing, i.e., when this option /// is enabled, load slicing bypasses most of its profitability guards. @@ -92,20 +104,19 @@ namespace { // contain duplicate or removed nodes. When choosing a node to // visit, we pop off the order stack until we find an item that is // also in the contents set. All operations are O(log N). - SmallPtrSet<SDNode*, 64> WorkListContents; - SmallVector<SDNode*, 64> WorkListOrder; + SmallPtrSet<SDNode*, 64> WorklistContents; + SmallVector<SDNode*, 64> WorklistOrder; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; - /// AddUsersToWorkList - When an instruction is simplified, add all users of + /// AddUsersToWorklist - When an instruction is simplified, add all users of /// the instruction to the work lists because they might get more simplified /// now. /// - void AddUsersToWorkList(SDNode *N) { - for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); - UI != UE; ++UI) - AddToWorkList(*UI); + void AddUsersToWorklist(SDNode *N) { + for (SDNode *Node : N->uses()) + AddToWorklist(Node); } /// visit - call the node-specific routine that knows how to fold each @@ -113,17 +124,22 @@ namespace { SDValue visit(SDNode *N); public: - /// AddToWorkList - Add to the work list making sure its instance is at the + /// AddToWorklist - Add to the work list making sure its instance is at the /// back (next to be processed.) - void AddToWorkList(SDNode *N) { - WorkListContents.insert(N); - WorkListOrder.push_back(N); + void AddToWorklist(SDNode *N) { + // Skip handle nodes as they can't usefully be combined and confuse the + // zero-use deletion strategy. + if (N->getOpcode() == ISD::HANDLENODE) + return; + + WorklistContents.insert(N); + WorklistOrder.push_back(N); } - /// removeFromWorkList - remove all instances of N from the worklist. + /// removeFromWorklist - remove all instances of N from the worklist. /// - void removeFromWorkList(SDNode *N) { - WorkListContents.erase(N); + void removeFromWorklist(SDNode *N) { + WorklistContents.erase(N); } SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, @@ -212,6 +228,7 @@ namespace { SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); + SDValue visitRotate(SDNode *N); SDValue visitCTLZ(SDNode *N); SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); @@ -257,11 +274,12 @@ namespace { SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); - SDValue visitShiftByConstant(SDNode *N, unsigned Amt); + SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); @@ -271,6 +289,11 @@ namespace { bool NotExtCompare = false); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, SDLoc DL, bool foldBooleans = true); + + bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) const; + bool isOneUseSetCC(SDValue N) const; + SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); @@ -280,6 +303,10 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); + SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, + SDValue InnerPos, SDValue InnerNeg, + unsigned PosOpcode, unsigned NegOpcode, + SDLoc DL); SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); @@ -296,26 +323,7 @@ namespace { /// isAlias - Return true if there is any possibility that the two addresses /// overlap. - bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, - const Value *SrcValue1, int SrcValueOffset1, - unsigned SrcValueAlign1, - const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, bool IsVolatile2, - const Value *SrcValue2, int SrcValueOffset2, - unsigned SrcValueAlign2, - const MDNode *TBAAInfo2) const; - - /// isAlias - Return true if there is any possibility that the two addresses - /// overlap. - bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1); - - /// FindAliasInfo - Extracts the relevant alias information from the memory - /// node. Returns true if the operand was a load. - bool FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, bool &IsVolatile, - const Value *&SrcValue, int &SrcValueOffset, - unsigned &SrcValueAlignment, - const MDNode *&TBAAInfo) const; + bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, /// looking for a better chain (aliasing node.) @@ -326,6 +334,14 @@ namespace { /// \return True if some memory operations were changed. bool MergeConsecutiveStores(StoreSDNode *N); + /// \brief Try to transform a truncation where C is a constant: + /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) + /// + /// \p N needs to be a truncation and its first operand an AND. Other + /// requirements are checked by the function (e.g. that trunc is + /// single-use) and if missed an empty SDValue is returned. + SDValue distributeTruncateThroughAnd(SDNode *N); + public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), @@ -370,16 +386,16 @@ namespace { namespace { -/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted +/// WorklistRemover - This class is a DAGUpdateListener that removes any deleted /// nodes from the worklist. -class WorkListRemover : public SelectionDAG::DAGUpdateListener { +class WorklistRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; public: - explicit WorkListRemover(DAGCombiner &dc) + explicit WorklistRemover(DAGCombiner &dc) : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} - virtual void NodeDeleted(SDNode *N, SDNode *E) { - DC.removeFromWorkList(N); + void NodeDeleted(SDNode *N, SDNode *E) override { + DC.removeFromWorklist(N); } }; } @@ -389,11 +405,11 @@ public: //===----------------------------------------------------------------------===// void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { - ((DAGCombiner*)DC)->AddToWorkList(N); + ((DAGCombiner*)DC)->AddToWorklist(N); } void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { - ((DAGCombiner*)DC)->removeFromWorkList(N); + ((DAGCombiner*)DC)->removeFromWorklist(N); } SDValue TargetLowering::DAGCombinerInfo:: @@ -566,79 +582,130 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } } - // isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc -// that selects between the values 1 and 0, making it equivalent to a setcc. -// Also, set the incoming LHS, RHS, and CC references to the appropriate -// nodes based on the type of node we are checking. This simplifies life a -// bit for the callers. -static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, - SDValue &CC) { +// that selects between the target values used for true and false, making it +// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to +// the appropriate nodes based on the type of node we are checking. This +// simplifies life a bit for the callers. +bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) const { if (N.getOpcode() == ISD::SETCC) { LHS = N.getOperand(0); RHS = N.getOperand(1); CC = N.getOperand(2); return true; } - if (N.getOpcode() == ISD::SELECT_CC && - N.getOperand(2).getOpcode() == ISD::Constant && - N.getOperand(3).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && - cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { - LHS = N.getOperand(0); - RHS = N.getOperand(1); - CC = N.getOperand(4); - return true; - } - return false; + + if (N.getOpcode() != ISD::SELECT_CC || + !TLI.isConstTrueVal(N.getOperand(2).getNode()) || + !TLI.isConstFalseVal(N.getOperand(3).getNode())) + return false; + + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(4); + return true; } // isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only // one use. If this is true, it allows the users to invert the operation for // free when it is profitable to do so. -static bool isOneUseSetCC(SDValue N) { +bool DAGCombiner::isOneUseSetCC(SDValue N) const { SDValue N0, N1, N2; if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) return true; return false; } +/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// elements are all the same constant or undefined. +static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { + BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); + if (!C) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + EVT EltVT = N->getValueType(0).getVectorElementType(); + return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs) && + EltVT.getSizeInBits() >= SplatBitSize); +} + +// \brief Returns the SDNode if it is a constant BuildVector or constant. +static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { + if (isa<ConstantSDNode>(N)) + return N.getNode(); + BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if(BV && BV->isConstant()) + return BV; + return nullptr; +} + +// \brief Returns the SDNode if it is a constant splat BuildVector or constant +// int. +static ConstantSDNode *isConstOrConstSplat(SDValue N) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); + + // BuildVectors can truncate their operands. Ignore that case here. + // FIXME: We blindly ignore splats which include undef which is overly + // pessimistic. + if (CN && UndefElements.none() && + CN->getValueType(0) == N.getValueType().getScalarType()) + return CN; + } + + return nullptr; +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); - if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { - if (isa<ConstantSDNode>(N1)) { - // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - SDValue OpNode = - DAG.FoldConstantArithmetic(Opc, VT, - cast<ConstantSDNode>(N0.getOperand(1)), - cast<ConstantSDNode>(N1)); - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); - } - if (N0.hasOneUse()) { - // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, - N0.getOperand(0), N1); - AddToWorkList(OpNode.getNode()); - return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); - } - } - - if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { - if (isa<ConstantSDNode>(N0)) { - // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - SDValue OpNode = - DAG.FoldConstantArithmetic(Opc, VT, - cast<ConstantSDNode>(N1.getOperand(1)), - cast<ConstantSDNode>(N0)); - return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); - } - if (N1.hasOneUse()) { - // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, - N1.getOperand(0), N0); - AddToWorkList(OpNode.getNode()); - return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + if (N0.getOpcode() == Opc) { + if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { + // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) + SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R); + if (!OpNode.getNode()) + return SDValue(); + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + } + if (N0.hasOneUse()) { + // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one + // use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); + if (!OpNode.getNode()) + return SDValue(); + AddToWorklist(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + } + } + } + + if (N1.getOpcode() == Opc) { + if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { + // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) + SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L); + if (!OpNode.getNode()) + return SDValue(); + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + } + if (N1.hasOneUse()) { + // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one + // use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); + if (!OpNode.getNode()) + return SDValue(); + AddToWorklist(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + } } } @@ -658,14 +725,14 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, assert((!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && "Cannot combine value to value of different type!")); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesWith(N, To); if (AddTo) { // Push the new nodes and any users onto the worklist for (unsigned i = 0, e = NumTo; i != e; ++i) { if (To[i].getNode()) { - AddToWorkList(To[i].getNode()); - AddUsersToWorkList(To[i].getNode()); + AddToWorklist(To[i].getNode()); + AddUsersToWorklist(To[i].getNode()); } } } @@ -676,7 +743,7 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, if (N->use_empty()) { // Nodes can be reintroduced into the worklist. Make sure we do not // process a node that has been replaced. - removeFromWorkList(N); + removeFromWorklist(N); // Finally, since the node is now dead, remove it from the graph. DAG.DeleteNode(N); @@ -688,24 +755,24 @@ void DAGCombiner:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); // Push the new node and any (possibly new) users onto the worklist. - AddToWorkList(TLO.New.getNode()); - AddUsersToWorkList(TLO.New.getNode()); + AddToWorklist(TLO.New.getNode()); + AddUsersToWorklist(TLO.New.getNode()); // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. if (TLO.Old.getNode()->use_empty()) { - removeFromWorkList(TLO.Old.getNode()); + removeFromWorklist(TLO.Old.getNode()); // If the operands of this node are only used by the node, they will now // be dead. Make sure to visit them first to delete dead nodes early. for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) - AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); + AddToWorklist(TLO.Old.getNode()->getOperand(i).getNode()); DAG.DeleteNode(TLO.Old.getNode()); } @@ -721,7 +788,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { return false; // Revisit the node. - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); // Replace the old value with the new one. ++NodesCombined; @@ -745,12 +812,12 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { dbgs() << "\nWith: "; Trunc.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); - removeFromWorkList(Load); + removeFromWorklist(Load); DAG.DeleteNode(Load); - AddToWorkList(Trunc.getNode()); + AddToWorklist(Trunc.getNode()); } SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { @@ -798,9 +865,9 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); - if (NewOp.getNode() == 0) + if (!NewOp.getNode()) return SDValue(); - AddToWorkList(NewOp.getNode()); + AddToWorklist(NewOp.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); @@ -813,9 +880,9 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); - if (NewOp.getNode() == 0) + if (!NewOp.getNode()) return SDValue(); - AddToWorkList(NewOp.getNode()); + AddToWorklist(NewOp.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); @@ -848,7 +915,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { bool Replace0 = false; SDValue N0 = Op.getOperand(0); SDValue NN0 = PromoteOperand(N0, PVT, Replace0); - if (NN0.getNode() == 0) + if (!NN0.getNode()) return SDValue(); bool Replace1 = false; @@ -858,13 +925,13 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { NN1 = NN0; else { NN1 = PromoteOperand(N1, PVT, Replace1); - if (NN1.getNode() == 0) + if (!NN1.getNode()) return SDValue(); } - AddToWorkList(NN0.getNode()); + AddToWorklist(NN0.getNode()); if (NN1.getNode()) - AddToWorkList(NN1.getNode()); + AddToWorklist(NN1.getNode()); if (Replace0) ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); @@ -911,10 +978,10 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); else N0 = PromoteOperand(N0, PVT, Replace); - if (N0.getNode() == 0) + if (!N0.getNode()) return SDValue(); - AddToWorkList(N0.getNode()); + AddToWorklist(N0.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); @@ -994,12 +1061,12 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { dbgs() << "\nTo: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); - removeFromWorkList(N); + removeFromWorklist(N); DAG.DeleteNode(N); - AddToWorkList(Result.getNode()); + AddToWorklist(Result.getNode()); return true; } return false; @@ -1019,7 +1086,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // Add all the dag nodes to the worklist. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) - AddToWorkList(I); + AddToWorklist(I); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any @@ -1032,23 +1099,23 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // while the worklist isn't empty, find a node and // try and combine it. - while (!WorkListContents.empty()) { + while (!WorklistContents.empty()) { SDNode *N; - // The WorkListOrder holds the SDNodes in order, but it may contain + // The WorklistOrder holds the SDNodes in order, but it may contain // duplicates. // In order to avoid a linear scan, we use a set (O(log N)) to hold what the // worklist *should* contain, and check the node we want to visit is should // actually be visited. do { - N = WorkListOrder.pop_back_val(); - } while (!WorkListContents.erase(N)); + N = WorklistOrder.pop_back_val(); + } while (!WorklistContents.erase(N)); // If N has no uses, it is dead. Make sure to revisit all N's operands once // N is deleted from the DAG, since they too may now be dead or may have a // reduced number of uses, allowing other xforms. - if (N->use_empty() && N != &Dummy) { + if (N->use_empty()) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - AddToWorkList(N->getOperand(i).getNode()); + AddToWorklist(N->getOperand(i).getNode()); DAG.DeleteNode(N); continue; @@ -1056,7 +1123,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { SDValue RV = combine(N); - if (RV.getNode() == 0) + if (!RV.getNode()) continue; ++NodesCombined; @@ -1080,7 +1147,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // Transfer debug value. DAG.TransferDbgValues(SDValue(N, 0), RV); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { @@ -1091,14 +1158,14 @@ void DAGCombiner::Run(CombineLevel AtLevel) { } // Push the new node and any users onto the worklist - AddToWorkList(RV.getNode()); - AddUsersToWorkList(RV.getNode()); + AddToWorklist(RV.getNode()); + AddUsersToWorklist(RV.getNode()); // Add any uses of the old node to the worklist in case this node is the // last one that uses them. They may become dead after this node is // deleted. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - AddToWorkList(N->getOperand(i).getNode()); + AddToWorklist(N->getOperand(i).getNode()); // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to @@ -1106,7 +1173,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { if (N->use_empty()) { // Nodes can be reintroduced into the worklist. Make sure we do not // process a node that has been replaced. - removeFromWorkList(N); + removeFromWorklist(N); // Finally, since the node is now dead, remove it from the graph. DAG.DeleteNode(N); @@ -1148,6 +1215,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SHL: return visitSHL(N); case ISD::SRA: return visitSRA(N); case ISD::SRL: return visitSRL(N); + case ISD::ROTR: + case ISD::ROTL: return visitRotate(N); case ISD::CTLZ: return visitCTLZ(N); case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); @@ -1193,6 +1262,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); } return SDValue(); } @@ -1201,7 +1271,7 @@ SDValue DAGCombiner::combine(SDNode *N) { SDValue RV = visit(N); // If nothing happened, try a target-specific DAG combine. - if (RV.getNode() == 0) { + if (!RV.getNode()) { assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"); @@ -1217,7 +1287,7 @@ SDValue DAGCombiner::combine(SDNode *N) { } // If nothing happened still, try promoting the operation. - if (RV.getNode() == 0) { + if (!RV.getNode()) { switch (N->getOpcode()) { default: break; case ISD::ADD: @@ -1247,17 +1317,23 @@ SDValue DAGCombiner::combine(SDNode *N) { // If N is a commutative binary node, try commuting it to enable more // sdisel CSE. - if (RV.getNode() == 0 && - SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && N->getNumValues() == 1) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // Constant operands are canonicalized to RHS. if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { - SDValue Ops[] = { N1, N0 }; - SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), - Ops, 2); + SDValue Ops[] = {N1, N0}; + SDNode *CSENode; + if (const BinaryWithFlagsSDNode *BinNode = + dyn_cast<BinaryWithFlagsSDNode>(N)) { + CSENode = DAG.getNodeIfExists( + N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), + BinNode->hasNoSignedWrap(), BinNode->isExact()); + } else { + CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); + } if (CSENode) return SDValue(CSENode, 0); } @@ -1321,7 +1397,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { // Queue up for processing. TFs.push_back(Op.getNode()); // Clean up in case the token factor is removed. - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); Changed = true; break; } @@ -1347,8 +1423,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getEntryNode(); } else { // New and improved token factor. - Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), - MVT::Other, &Ops[0], Ops.size()); + Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); } // Don't add users to work list. @@ -1360,18 +1435,18 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { /// MERGE_VALUES can always be eliminated. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); // Replacing results may cause a different MERGE_VALUES to suddenly // be CSE'd with N, and carry its uses with it. Iterate until no // uses remain, to ensure that the node can be safely deleted. // First add the users of this node to the work list so that they // can be tried again once they have new operands. - AddUsersToWorkList(N); + AddUsersToWorklist(N); do { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); } while (!N->use_empty()); - removeFromWorkList(N); + removeFromWorklist(N); DAG.DeleteNode(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -1447,7 +1522,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { N0.getOperand(1)); // reassociate add SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); - if (RADD.getNode() != 0) + if (RADD.getNode()) return RADD; // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && @@ -1500,15 +1575,17 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isInteger() && !VT.isVector()) { APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); + DAG.computeKnownBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); + DAG.computeKnownBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){ + if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); + } } } @@ -1593,10 +1670,10 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); + DAG.computeKnownBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); + DAG.computeKnownBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. @@ -1645,7 +1722,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); - ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); EVT VT = N0.getValueType(); @@ -1778,22 +1855,6 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { return SDValue(); } -/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose -/// elements are all the same constant or undefined. -static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { - BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); - if (!C) - return false; - - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - EVT EltVT = N->getValueType(0).getVectorElementType(); - return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, - HasAnyUndefs) && - EltVT.getSizeInBits() >= SplatBitSize); -} - SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1814,10 +1875,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); } else { - N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0; + N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr; ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() : APInt(); - N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0; + N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr; ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() : APInt(); } @@ -1867,7 +1928,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { isa<ConstantSDNode>(N0.getOperand(1)))) { SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); - AddToWorkList(C3.getNode()); + AddToWorklist(C3.getNode()); return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } @@ -1875,7 +1936,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one // use. { - SDValue Sh(0,0), Y(0,0); + SDValue Sh(nullptr,0), Y(nullptr,0); // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || @@ -1908,7 +1969,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // reassociate mul SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); - if (RMUL.getNode() != 0) + if (RMUL.getNode()) return RMUL; return SDValue(); @@ -1917,8 +1978,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops @@ -1944,10 +2005,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), N0, N1); } + // fold (sdiv X, pow2) -> simple ops after legalize - if (N1C && !N1C->isNullValue() && - (N1C->getAPIntValue().isPowerOf2() || - (-N1C->getAPIntValue()).isPowerOf2())) { + if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. if (TLI.isPow2DivCheap()) @@ -1956,18 +2017,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register - SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, - DAG.getConstant(VT.getSizeInBits()-1, - getShiftAmountTy(N0.getValueType()))); - AddToWorkList(SGN.getNode()); + SDValue SGN = + DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, + DAG.getConstant(VT.getScalarSizeInBits() - 1, + getShiftAmountTy(N0.getValueType()))); + AddToWorklist(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; - SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, - DAG.getConstant(VT.getSizeInBits() - lg2, - getShiftAmountTy(SGN.getValueType()))); + SDValue SRL = + DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, + DAG.getConstant(VT.getScalarSizeInBits() - lg2, + getShiftAmountTy(SGN.getValueType()))); SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); - AddToWorkList(SRL.getNode()); - AddToWorkList(ADD.getNode()); // Divide by pow2 + AddToWorklist(SRL.getNode()); + AddToWorklist(ADD.getNode()); // Divide by pow2 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); @@ -1976,14 +2039,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (N1C->getAPIntValue().isNonNegative()) return SRA; - AddToWorkList(SRA.getNode()); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), SRA); + AddToWorklist(SRA.getNode()); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); } // if integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. - if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { + if (N1C && !TLI.isIntDivCheap()) { SDValue Op = BuildSDIV(N); if (Op.getNode()) return Op; } @@ -2001,8 +2063,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops @@ -2029,13 +2091,13 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { DAG.getConstant(SHC->getAPIntValue() .logBase2(), ADDVT)); - AddToWorkList(Add.getNode()); + AddToWorklist(Add.getNode()); return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); } } } // fold (udiv x, c) -> alternate - if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { + if (N1C && !TLI.isIntDivCheap()) { SDValue Op = BuildUDIV(N); if (Op.getNode()) return Op; } @@ -2053,8 +2115,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (srem c1, c2) -> c1%c2 @@ -2071,13 +2133,13 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); - AddToWorkList(Div.getNode()); + AddToWorklist(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); - AddToWorkList(Mul.getNode()); + AddToWorklist(Mul.getNode()); return Sub; } } @@ -2095,8 +2157,8 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (urem c1, c2) -> c1%c2 @@ -2114,7 +2176,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); - AddToWorkList(Add.getNode()); + AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); } } @@ -2124,13 +2186,13 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); - AddToWorkList(Div.getNode()); + AddToWorklist(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); - AddToWorkList(Mul.getNode()); + AddToWorklist(Mul.getNode()); return Sub; } } @@ -2229,9 +2291,9 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, bool HiExists = N->hasAnyUseOfValue(1); if (!HiExists && (!LegalOperations || - TLI.isOperationLegal(LoOp, N->getValueType(0)))) { + TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), - N->op_begin(), N->getNumOperands()); + ArrayRef<SDUse>(N->op_begin(), N->op_end())); return CombineTo(N, Res, Res); } @@ -2241,7 +2303,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, (!LegalOperations || TLI.isOperationLegal(HiOp, N->getValueType(1)))) { SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), - N->op_begin(), N->getNumOperands()); + ArrayRef<SDUse>(N->op_begin(), N->op_end())); return CombineTo(N, Res, Res); } @@ -2252,8 +2314,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, // If the two computed results can be simplified separately, separate them. if (LoExists) { SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), - N->op_begin(), N->getNumOperands()); - AddToWorkList(Lo.getNode()); + ArrayRef<SDUse>(N->op_begin(), N->op_end())); + AddToWorklist(Lo.getNode()); SDValue LoOpt = combine(Lo.getNode()); if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && (!LegalOperations || @@ -2263,8 +2325,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (HiExists) { SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), - N->op_begin(), N->getNumOperands()); - AddToWorkList(Hi.getNode()); + ArrayRef<SDUse>(N->op_begin(), N->op_end())); + AddToWorklist(Hi.getNode()); SDValue HiOpt = combine(Hi.getNode()); if (HiOpt.getNode() && HiOpt != Hi && (!LegalOperations || @@ -2403,7 +2465,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); } @@ -2417,7 +2479,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode, N0.getOperand(1)); } @@ -2442,7 +2504,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); return BC; } } @@ -2454,35 +2516,66 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // The type-legalizer generates this pattern when loading illegal // vector types from memory. In many cases this allows additional shuffle // optimizations. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - N0.getOperand(1).getOpcode() == ISD::UNDEF && - N1.getOperand(1).getOpcode() == ISD::UNDEF) { + // There are other cases where moving the shuffle after the xor/and/or + // is profitable even if shuffles don't perform a swizzle. + // If both shuffles use the same mask, and both shuffles have the same first + // or second operand, then it might still be profitable to move the shuffle + // after the xor/and/or operation. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); - assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && + assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && "Inputs to shuffles are not the same type"); - unsigned NumElts = VT.getVectorNumElements(); - // Check that both shuffles use the same mask. The masks are known to be of // the same length because the result vector type is the same. - bool SameMask = true; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx0 = SVN0->getMaskElt(i); - int Idx1 = SVN1->getMaskElt(i); - if (Idx0 != Idx1) { - SameMask = false; - break; + // Check also that shuffles have only one use to avoid introducing extra + // instructions. + if (SVN0->hasOneUse() && SVN1->hasOneUse() && + SVN0->getMask().equals(SVN1->getMask())) { + SDValue ShOp = N0->getOperand(1); + + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (!LegalTypes) + ShOp = DAG.getConstant(0, VT); + else + ShOp = SDValue(); } - } - if (SameMask) { - SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT, - N0.getOperand(0), N1.getOperand(0)); - AddToWorkList(Op.getNode()); - return DAG.getVectorShuffle(VT, SDLoc(N), Op, - DAG.getUNDEF(VT), &SVN0->getMask()[0]); + // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) + // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) + // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) + if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { + SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + N0->getOperand(0), N1->getOperand(0)); + AddToWorklist(NewNode.getNode()); + return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, + &SVN0->getMask()[0]); + } + + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + ShOp = N0->getOperand(0); + if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (!LegalTypes) + ShOp = DAG.getConstant(0, VT); + else + ShOp = SDValue(); + } + + // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) + // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) + // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) + if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { + SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + N0->getOperand(1), N1->getOperand(1)); + AddToWorklist(NewNode.getNode()); + return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, + &SVN0->getMask()[0]); + } } } @@ -2534,7 +2627,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.getConstant(0, VT); // reassociate and SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); - if (RAND.getNode() != 0) + if (RAND.getNode()) return RAND; // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) @@ -2670,21 +2763,21 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), LR.getValueType(), LL, RL); - AddToWorkList(ANDNode.getNode()); + AddToWorklist(ANDNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } } @@ -2697,7 +2790,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { cast<ConstantSDNode>(RR)->isNullValue()))) { SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), LL, DAG.getConstant(1, LL.getValueType())); - AddToWorkList(ADDNode.getNode()); + AddToWorklist(ADDNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ADDNode, DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); } @@ -2745,7 +2838,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -2765,7 +2858,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -2796,7 +2889,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), LN0->getBasePtr(), ExtVT, LN0->getMemOperand()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -2823,7 +2916,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { Alignment = MinAlign(Alignment, PtrOff); } - AddToWorkList(NewPtr.getNode()); + AddToWorklist(NewPtr.getNode()); EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = @@ -2832,7 +2925,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), Alignment, LN0->getTBAAInfo()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -3067,7 +3160,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!TLI.isOperationLegal(ISD::BSWAP, VT)) return SDValue(); - SmallVector<SDNode*,4> Parts(4, (SDNode*)0); + SmallVector<SDNode*,4> Parts(4, (SDNode*)nullptr); // Look for either // (or (or (and), (and)), (or (and), (and))) // (or (or (or (and), (and)), (and)), (and)) @@ -3151,6 +3244,62 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return N0; if (ISD::isBuildVectorAllOnes(N1.getNode())) return N1; + + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) + // Do this only if the resulting shuffle is legal. + if (isa<ShuffleVectorSDNode>(N0) && + isa<ShuffleVectorSDNode>(N1) && + // Avoid folding a node with illegal type. + TLI.isTypeLegal(VT) && + N0->getOperand(1) == N1->getOperand(1) && + ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { + bool CanFold = true; + unsigned NumElts = VT.getVectorNumElements(); + const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); + const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); + // We construct two shuffle masks: + // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand + // and N1 as the second operand. + // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand + // and N0 as the second operand. + // We do this because OR is commutable and therefore there might be + // two ways to fold this node into a shuffle. + SmallVector<int,4> Mask1; + SmallVector<int,4> Mask2; + + for (unsigned i = 0; i != NumElts && CanFold; ++i) { + int M0 = SV0->getMaskElt(i); + int M1 = SV1->getMaskElt(i); + + // Both shuffle indexes are undef. Propagate Undef. + if (M0 < 0 && M1 < 0) { + Mask1.push_back(M0); + Mask2.push_back(M0); + continue; + } + + if (M0 < 0 || M1 < 0 || + (M0 < (int)NumElts && M1 < (int)NumElts) || + (M0 >= (int)NumElts && M1 >= (int)NumElts)) { + CanFold = false; + break; + } + + Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); + Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); + } + + if (CanFold) { + // Fold this sequence only if the resulting shuffle is 'legal'. + if (TLI.isShuffleMaskLegal(Mask1, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), + N1->getOperand(0), &Mask1[0]); + if (TLI.isShuffleMaskLegal(Mask2, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), + N0->getOperand(0), &Mask2[0]); + } + } } // fold (or x, undef) -> -1 @@ -3177,26 +3326,29 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) SDValue BSwap = MatchBSwapHWord(N, N0, N1); - if (BSwap.getNode() != 0) + if (BSwap.getNode()) return BSwap; BSwap = MatchBSwapHWordLow(N, N0, N1); - if (BSwap.getNode() != 0) + if (BSwap.getNode()) return BSwap; // reassociate or SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); - if (ROR.getNode() != 0) + if (ROR.getNode()) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) == 0. if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); - if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) + if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { + SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1); + if (!COR.getNode()) + return SDValue(); return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1), - DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); + N0.getOperand(0), N1), COR); + } } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ @@ -3211,7 +3363,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), LR.getValueType(), LL, RL); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) @@ -3220,7 +3372,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), LR.getValueType(), LL, RL); - AddToWorkList(ANDNode.getNode()); + AddToWorklist(ANDNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } } @@ -3302,35 +3454,163 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { return false; } +// Return true if we can prove that, whenever Neg and Pos are both in the +// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that +// for two opposing shifts shift1 and shift2 and a value X with OpBits bits: +// +// (or (shift1 X, Neg), (shift2 X, Pos)) +// +// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate +// in direction shift1 by Neg. The range [0, OpSize) means that we only need +// to consider shift amounts with defined behavior. +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { + // If OpSize is a power of 2 then: + // + // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) + // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). + // + // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check + // for the stronger condition: + // + // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] + // + // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) + // we can just replace Neg with Neg' for the rest of the function. + // + // In other cases we check for the even stronger condition: + // + // Neg == OpSize - Pos [B] + // + // for all Neg and Pos. Note that the (or ...) then invokes undefined + // behavior if Pos == 0 (and consequently Neg == OpSize). + // + // We could actually use [A] whenever OpSize is a power of 2, but the + // only extra cases that it would match are those uninteresting ones + // where Neg and Pos are never in range at the same time. E.g. for + // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) + // as well as (sub 32, Pos), but: + // + // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) + // + // always invokes undefined behavior for 32-bit X. + // + // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. + unsigned MaskLoBits = 0; + if (Neg.getOpcode() == ISD::AND && + isPowerOf2_64(OpSize) && + Neg.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { + Neg = Neg.getOperand(0); + MaskLoBits = Log2_64(OpSize); + } + + // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. + if (Neg.getOpcode() != ISD::SUB) + return 0; + ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); + if (!NegC) + return 0; + SDValue NegOp1 = Neg.getOperand(1); + + // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with + // Pos'. The truncation is redundant for the purpose of the equality. + if (MaskLoBits && + Pos.getOpcode() == ISD::AND && + Pos.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) + Pos = Pos.getOperand(0); + + // The condition we need is now: + // + // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask + // + // If NegOp1 == Pos then we need: + // + // OpSize & Mask == NegC & Mask + // + // (because "x & Mask" is a truncation and distributes through subtraction). + APInt Width; + if (Pos == NegOp1) + Width = NegC->getAPIntValue(); + // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. + // Then the condition we want to prove becomes: + // + // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask + // + // which, again because "x & Mask" is a truncation, becomes: + // + // NegC & Mask == (OpSize - PosC) & Mask + // OpSize & Mask == (NegC + PosC) & Mask + else if (Pos.getOpcode() == ISD::ADD && + Pos.getOperand(0) == NegOp1 && + Pos.getOperand(1).getOpcode() == ISD::Constant) + Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + + NegC->getAPIntValue()); + else + return false; + + // Now we just need to check that OpSize & Mask == Width & Mask. + if (MaskLoBits) + // Opsize & Mask is 0 since Mask is Opsize - 1. + return Width.getLoBits(MaskLoBits) == 0; + return Width == OpSize; +} + +// A subroutine of MatchRotate used once we have found an OR of two opposite +// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces +// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the +// former being preferred if supported. InnerPos and InnerNeg are Pos and +// Neg with outer conversions stripped away. +SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, + SDValue Neg, SDValue InnerPos, + SDValue InnerNeg, unsigned PosOpcode, + unsigned NegOpcode, SDLoc DL) { + // fold (or (shl x, (*ext y)), + // (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) or (rotr x, (sub 32, y)) + // + // fold (or (shl x, (*ext (sub 32, y))), + // (srl x, (*ext y))) -> + // (rotr x, y) or (rotl x, (sub 32, y)) + EVT VT = Shifted.getValueType(); + if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { + bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); + return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, + HasPos ? Pos : Neg).getNode(); + } + + return nullptr; +} + // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); - if (!TLI.isTypeLegal(VT)) return 0; + if (!TLI.isTypeLegal(VT)) return nullptr; // The target must have at least one rotate flavor. bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); - if (!HasROTL && !HasROTR) return 0; + if (!HasROTL && !HasROTR) return nullptr; // Match "(X shl/srl V1) & V2" where V2 may not be present. SDValue LHSShift; // The shift. SDValue LHSMask; // AND value if any. if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) - return 0; // Not part of a rotate. + return nullptr; // Not part of a rotate. SDValue RHSShift; // The shift. SDValue RHSMask; // AND value if any. if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) - return 0; // Not part of a rotate. + return nullptr; // Not part of a rotate. if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) - return 0; // Not shifting the same value. + return nullptr; // Not shifting the same value. if (LHSShift.getOpcode() == RHSShift.getOpcode()) - return 0; // Shifts must disagree. + return nullptr; // Shifts must disagree. // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { @@ -3342,6 +3622,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { unsigned OpSizeInBits = VT.getSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftArg = RHSShift.getOperand(0); SDValue RHSShiftAmt = RHSShift.getOperand(1); // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) @@ -3351,7 +3632,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); if ((LShVal + RShVal) != OpSizeInBits) - return 0; + return nullptr; SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); @@ -3378,7 +3659,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) - return 0; + return nullptr; // If the shift amount is sign/zext/any-extended just peel it off. SDValue LExtOp0 = LHSShiftAmt; @@ -3395,30 +3676,17 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { RExtOp0 = RHSShiftAmt.getOperand(0); } - if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) { - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotl x, y) - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotr x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } else if (LExtOp0.getOpcode() == ISD::SUB && - RExtOp0 == LExtOp0.getOperand(1)) { - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotr x, y) - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotl x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } - - return 0; + SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, + LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); + if (TryL) + return TryL; + + SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); + if (TryR) + return TryR; + + return nullptr; } SDValue DAGCombiner::visitXOR(SDNode *N) { @@ -3460,7 +3728,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return N0; // reassociate xor SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); - if (RXOR.getNode() != 0) + if (RXOR.getNode()) return RXOR; // fold !(x cc y) -> (x !cc y) @@ -3490,7 +3758,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue V = N0.getOperand(0); V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, DAG.getConstant(1, V.getValueType())); - AddToWorkList(V.getNode()); + AddToWorklist(V.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); } @@ -3502,7 +3770,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } @@ -3514,7 +3782,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } @@ -3523,7 +3791,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { N0->getOperand(1) == N1) { SDValue X = N0->getOperand(0); SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); - AddToWorkList(NotX.getNode()); + AddToWorklist(NotX.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); } // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) @@ -3559,7 +3827,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { /// visitShiftByConstant - Handle transforms common to the three shifts, when /// the shift amount is a constant. -SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { +SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { + // We can't and shouldn't fold opaque constants. + if (Amt->isOpaque()) + return SDValue(); + SDNode *LHS = N->getOperand(0).getNode(); if (!LHS->hasOneUse()) return SDValue(); @@ -3585,9 +3857,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { break; } - // We require the RHS of the binop to be a constant as well. + // We require the RHS of the binop to be a constant and not opaque as well. ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); - if (!BinOpCst) return SDValue(); + if (!BinOpCst || BinOpCst->isOpaque()) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant. // If it is not a shift, it pessimizes some common cases like: @@ -3613,10 +3885,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { return SDValue(); } + if (!TLI.isDesirableToCommuteWithShift(LHS)) + return SDValue(); + // Fold the constants, shifting the binop RHS by the shift amount. SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), N->getValueType(0), LHS->getOperand(1), N->getOperand(1)); + assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); // Create the new shift. SDValue NewShift = DAG.getNode(N->getOpcode(), @@ -3627,18 +3903,74 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); } +SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { + assert(N->getOpcode() == ISD::TRUNCATE); + assert(N->getOperand(0).getOpcode() == ISD::AND); + + // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) + if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { + SDValue N01 = N->getOperand(0).getOperand(1); + + if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { + EVT TruncVT = N->getValueType(0); + SDValue N00 = N->getOperand(0).getOperand(0); + APInt TruncC = N01C->getAPIntValue(); + TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + + return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, + DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), + DAG.getConstant(TruncC, TruncVT)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitRotate(SDNode *N) { + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). + if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && + N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); + if (NewOp1.getNode()) + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), NewOp1); + } + return SDValue(); +} + SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); + // If setcc produces all-one true value then: + // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) + if (N1CV && N1CV->isConstant()) { + if (N0.getOpcode() == ISD::AND) { + SDValue N00 = N0->getOperand(0); + SDValue N01 = N0->getOperand(1); + BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); + + if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && + TLI.getBooleanContents(N00.getOperand(0).getValueType()) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV); + if (C.getNode()) + return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); + } + } else { + N1C = isConstOrConstSplat(N1); + } + } } // fold (shl c1, c2) -> c1<<c2 @@ -3662,35 +3994,25 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getConstant(0, VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::AND && - N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { - SDValue N101 = N1.getOperand(0).getOperand(1); - if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - EVT TruncVT = N1.getValueType(); - SDValue N100 = N1.getOperand(0).getOperand(0); - APInt TruncC = N101C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getNode(ISD::AND, SDLoc(N), TruncVT, - DAG.getNode(ISD::TRUNCATE, - SDLoc(N), - TruncVT, N100), - DAG.getConstant(TruncC, TruncVT))); - } + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SHL && - N0.getOperand(1).getOpcode() == ISD::Constant) { - uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + if (N1C && N0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N0C1->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } } // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) @@ -3701,20 +4023,21 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND) && - N0.getOperand(0).getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = - cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - EVT InnerShiftVT = N0.getOperand(0).getValueType(); - uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); - if (c2 >= OpSizeInBits - InnerShiftSize) { - if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N0), VT, - DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, - N0.getOperand(0)->getOperand(0)), - DAG.getConstant(c1 + c2, N1.getValueType())); + N0.getOperand(0).getOpcode() == ISD::SHL) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { + uint64_t c1 = N0Op0C1->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0Op0.getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); + if (c2 >= OpSizeInBits - InnerShiftSize) { + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, SDLoc(N0), VT, + DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, + N0Op0->getOperand(0)), + DAG.getConstant(c1 + c2, N1.getValueType())); + } } } @@ -3722,19 +4045,20 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // Only fold this if the inner zext has no other uses to avoid increasing // the total number of instructions. if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && - N0.getOperand(0).getOpcode() == ISD::SRL && - isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = - cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); - if (c1 < VT.getSizeInBits()) { - uint64_t c2 = N1C->getZExtValue(); - if (c1 == c2) { - SDValue NewOp0 = N0.getOperand(0); - EVT CountVT = NewOp0.getOperand(1).getValueType(); - SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), - NewOp0, DAG.getConstant(c2, CountVT)); - AddToWorkList(NewSHL.getNode()); - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + N0.getOperand(0).getOpcode() == ISD::SRL) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { + uint64_t c1 = N0Op0C1->getZExtValue(); + if (c1 < VT.getScalarSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + if (c1 == c2) { + SDValue NewOp0 = N0.getOperand(0); + EVT CountVT = NewOp0.getOperand(1).getValueType(); + SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), + NewOp0, DAG.getConstant(c2, CountVT)); + AddToWorklist(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + } } } } @@ -3743,40 +4067,39 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding // this will increase the total number of instructions. - if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && - N0.getOperand(1).getOpcode() == ISD::Constant) { - uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - if (c1 < VT.getSizeInBits()) { - uint64_t c2 = N1C->getZExtValue(); - APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), - VT.getSizeInBits() - c1); - SDValue Shift; - if (c2 > c1) { - Mask = Mask.shl(c2-c1); - Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c2-c1, N1.getValueType())); - } else { - Mask = Mask.lshr(c1-c2); - Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1-c2, N1.getValueType())); + if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N0C1->getZExtValue(); + if (c1 < OpSizeInBits) { + uint64_t c2 = N1C->getZExtValue(); + APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); + SDValue Shift; + if (c2 > c1) { + Mask = Mask.shl(c2 - c1); + Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c2 - c1, N1.getValueType())); + } else { + Mask = Mask.lshr(c1 - c2); + Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c1 - c2, N1.getValueType())); + } + return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, + DAG.getConstant(Mask, VT)); } - return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, - DAG.getConstant(Mask, VT)); } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + unsigned BitSize = VT.getScalarSizeInBits(); SDValue HiBitsMask = - DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), - VT.getSizeInBits() - - N1C->getZExtValue()), - VT); + DAG.getConstant(APInt::getHighBitsSet(BitSize, + BitSize - N1C->getZExtValue()), VT); return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), HiBitsMask); } if (N1C) { - SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); + SDValue NewSHL = visitShiftByConstant(N, N1C); if (NewSHL.getNode()) return NewSHL; } @@ -3796,6 +4119,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N1C = isConstOrConstSplat(N1); } // fold (sra c1, c2) -> (sra c1, c2) @@ -3829,11 +4154,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRA) { - if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); - if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; + if (Sum >= OpSizeInBits) + Sum = OpSizeInBits - 1; return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(Sum, N1C->getValueType(0))); + DAG.getConstant(Sum, N1.getValueType())); } } @@ -3842,14 +4168,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // result_size - n != m. // If truncate is free for the target sext(shl) is likely to result in better // code. - if (N0.getOpcode() == ISD::SHL) { + if (N0.getOpcode() == ISD::SHL && N1C) { // Get the two constanst of the shifts, CN0 = m, CN = n. - const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (N01C && N1C) { + const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); + if (N01C) { + LLVMContext &Ctx = *DAG.getContext(); // Determine what the truncate's result bitsize and type would be. - EVT TruncVT = - EVT::getIntegerVT(*DAG.getContext(), - OpSizeInBits - N1C->getZExtValue()); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); + + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + // Determine the residual right-shift amount. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -3876,44 +4205,33 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::AND && - N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { - SDValue N101 = N1.getOperand(0).getOperand(1); - if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - EVT TruncVT = N1.getValueType(); - SDValue N100 = N1.getOperand(0).getOperand(0); - APInt TruncC = N101C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); - return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, - DAG.getNode(ISD::AND, SDLoc(N), - TruncVT, - DAG.getNode(ISD::TRUNCATE, - SDLoc(N), - TruncVT, N100), - DAG.getConstant(TruncC, TruncVT))); - } - } - - // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); + } + + // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) // if c1 is equal to the number of bits the trunc removes if (N0.getOpcode() == ISD::TRUNCATE && (N0.getOperand(0).getOpcode() == ISD::SRL || N0.getOperand(0).getOpcode() == ISD::SRA) && N0.getOperand(0).hasOneUse() && N0.getOperand(0).getOperand(1).hasOneUse() && - N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { - EVT LargeVT = N0.getOperand(0).getValueType(); - ConstantSDNode *LargeShiftAmt = - cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); - - if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == - LargeShiftAmt->getZExtValue()) { - SDValue Amt = - DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), - getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, - N0.getOperand(0).getOperand(0), Amt); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); + N1C) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { + unsigned LargeShiftVal = LargeShift->getZExtValue(); + EVT LargeVT = N0Op0.getValueType(); + + if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { + SDValue Amt = + DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), + getShiftAmountTy(N0Op0.getOperand(0).getValueType())); + SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, + N0Op0.getOperand(0), Amt); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); + } } } @@ -3927,7 +4245,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C) { - SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); + SDValue NewSRA = visitShiftByConstant(N, N1C); if (NewSRA.getNode()) return NewSRA; } @@ -3947,6 +4265,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N1C = isConstOrConstSplat(N1); } // fold (srl c1, c2) -> c1 >>u c2 @@ -3967,14 +4287,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getConstant(0, VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SRL && - N0.getOperand(1).getOpcode() == ISD::Constant) { - uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + if (N1C && N0.getOpcode() == ISD::SRL) { + if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N01C->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } } // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) @@ -3999,18 +4320,21 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl (shl x, c), c) -> (and x, cst2) - if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && - N0.getValueSizeInBits() <= 64) { - uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(~0ULL >> ShAmt, VT)); + if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { + unsigned BitSize = N0.getScalarValueSizeInBits(); + if (BitSize <= 64) { + uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, VT)); + } } // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? EVT SmallVT = N0.getOperand(0).getValueType(); - if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) + unsigned BitSize = SmallVT.getScalarSizeInBits(); + if (N1C->getZExtValue() >= BitSize) return DAG.getUNDEF(VT); if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { @@ -4018,8 +4342,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, N0.getOperand(0), DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); - AddToWorkList(SmallShift.getNode()); - APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt); + AddToWorklist(SmallShift.getNode()); + APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), DAG.getConstant(Mask, VT)); @@ -4028,16 +4352,16 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign // bit, which is unmodified by sra. - if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { + if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { if (N0.getOpcode() == ISD::SRA) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); } // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). if (N1C && N0.getOpcode() == ISD::CTLZ && - N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { + N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); + DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. @@ -4060,7 +4384,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (ShAmt) { Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); } return DAG.getNode(ISD::XOR, SDLoc(N), VT, @@ -4070,22 +4394,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::AND && - N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { - SDValue N101 = N1.getOperand(0).getOperand(1); - if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - EVT TruncVT = N1.getValueType(); - SDValue N100 = N1.getOperand(0).getOperand(0); - APInt TruncC = N101C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, - DAG.getNode(ISD::AND, SDLoc(N), - TruncVT, - DAG.getNode(ISD::TRUNCATE, - SDLoc(N), - TruncVT, N100), - DAG.getConstant(TruncC, TruncVT))); - } + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); } // fold operands of srl based on knowledge that the low bits are not @@ -4094,7 +4406,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(N, 0); if (N1C) { - SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); + SDValue NewSRL = visitShiftByConstant(N, N1C); if (NewSRL.getNode()) return NewSRL; } @@ -4124,12 +4436,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N->hasOneUse()) { SDNode *Use = *N->use_begin(); if (Use->getOpcode() == ISD::BRCOND) - AddToWorkList(Use); + AddToWorklist(Use); else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { // Also look pass the truncate. Use = *Use->use_begin(); if (Use->getOpcode() == ISD::BRCOND) - AddToWorkList(Use); + AddToWorklist(Use); } } @@ -4209,11 +4521,20 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. if (VT.isInteger() && - (VT0 == MVT::i1 || - (VT0.isInteger() && - TLI.getBooleanContents(false) == - TargetLowering::ZeroOrOneBooleanContent)) && + (VT0 == MVT::i1 || (VT0.isInteger() && + TLI.getBooleanContents(false, false) == + TLI.getBooleanContents(false, true) && + TLI.getBooleanContents(false, false) == + TargetLowering::ZeroOrOneBooleanContent)) && N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) @@ -4221,7 +4542,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { N0, DAG.getConstant(1, VT0)); XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, N0, DAG.getConstant(1, VT0)); - AddToWorkList(XORNode.getNode()); + AddToWorklist(XORNode.getNode()); if (VT.bitsGT(VT0)) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); @@ -4229,13 +4550,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold (select C, 0, X) -> (and (not C), X) if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); - AddToWorkList(NOTNode.getNode()); + AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); - AddToWorkList(NOTNode.getNode()); + AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } // fold (select C, X, 0) -> (and C, X) @@ -4256,12 +4577,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { - // FIXME: - // Check against MVT::Other for SELECT_CC, which is a workaround for targets - // having to say they don't support SELECT_CC on every type the DAG knows - // about, since there is no way to mark an opcode illegal at all value types - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && - TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) + if ((!LegalOperations && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || + TLI.isOperationLegal(ISD::SELECT_CC, VT)) return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); @@ -4275,12 +4593,12 @@ static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the inputs. SDValue Lo, Hi, LL, LH, RL, RH; - llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -4288,6 +4606,56 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { return std::make_pair(Lo, Hi); } +// This function assumes all the vselect's arguments are CONCAT_VECTOR +// nodes and that the condition is a BV of ConstantSDNodes (or undefs). +static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { + SDLoc dl(N); + SDValue Cond = N->getOperand(0); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + MVT VT = N->getSimpleValueType(0); + int NumElems = VT.getVectorNumElements(); + assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && + RHS.getOpcode() == ISD::CONCAT_VECTORS && + Cond.getOpcode() == ISD::BUILD_VECTOR); + + // We're sure we have an even number of elements due to the + // concat_vectors we have as arguments to vselect. + // Skip BV elements until we find one that's not an UNDEF + // After we find an UNDEF element, keep looping until we get to half the + // length of the BV and see if all the non-undef nodes are the same. + ConstantSDNode *BottomHalf = nullptr; + for (int i = 0; i < NumElems / 2; ++i) { + if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + continue; + + if (BottomHalf == nullptr) + BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); + else if (Cond->getOperand(i).getNode() != BottomHalf) + return SDValue(); + } + + // Do the same for the second half of the BuildVector + ConstantSDNode *TopHalf = nullptr; + for (int i = NumElems / 2; i < NumElems; ++i) { + if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + continue; + + if (TopHalf == nullptr) + TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); + else if (Cond->getOperand(i).getNode() != TopHalf) + return SDValue(); + } + + assert(TopHalf && BottomHalf && + "One half of the selector was all UNDEFs and the other was all the " + "same value. This should have been addressed before this function."); + return DAG.getNode( + ISD::CONCAT_VECTORS, dl, VT, + BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), + TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4319,8 +4687,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { ISD::SRA, DL, VT, LHS, DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); + AddToWorklist(Shift.getNode()); + AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); } } @@ -4338,21 +4706,39 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return SDValue(); SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; - llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); - llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); - llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); + std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); // Add the new VSELECT nodes to the work list in case they need to be split // again. - AddToWorkList(Lo.getNode()); - AddToWorkList(Hi.getNode()); + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); } + // Fold (vselect (build_vector all_ones), N1, N2) -> N1 + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N2; + + // The ConvertSelectToConcatVector function is assuming both the above + // checks for (vselect (build_vector all{ones,zeros) ...) have been made + // and addressed. + if (N1.getOpcode() == ISD::CONCAT_VECTORS && + N2.getOpcode() == ISD::CONCAT_VECTORS && + ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + SDValue CV = ConvertSelectToConcatVector(N, DAG); + if (CV.getNode()) + return CV; + } + return SDValue(); } @@ -4372,7 +4758,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, SDLoc(N), false); if (SCC.getNode()) { - AddToWorkList(SCC.getNode()); + AddToWorklist(SCC.getNode()); if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { if (!SCCC->isNullValue()) @@ -4402,6 +4788,65 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { SDLoc(N)); } +// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext +// dag node into a ConstantSDNode or a build_vector of constants. +// This function is called by the DAGCombiner when visiting sext/zext/aext +// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). +// Vector extends are not folded if operations are legal; this is to +// avoid introducing illegal build_vector dag nodes. +static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, + SelectionDAG &DAG, bool LegalTypes, + bool LegalOperations) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || + Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); + + // fold (sext c1) -> c1 + // fold (zext c1) -> c1 + // fold (aext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); + + // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) + // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) + // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) + EVT SVT = VT.getScalarType(); + if (!(VT.isVector() && + (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && + ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) + return nullptr; + + // We can fold this node into a build_vector. + unsigned VTBits = SVT.getSizeInBits(); + unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); + unsigned ShAmt = VTBits - EVTBits; + SmallVector<SDValue, 8> Elts; + unsigned NumElts = N0->getNumOperands(); + SDLoc DL(N); + + for (unsigned i=0; i != NumElts; ++i) { + SDValue Op = N0->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + Elts.push_back(DAG.getUNDEF(SVT)); + continue; + } + + ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); + const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); + if (Opcode == ISD::SIGN_EXTEND) + Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), + SVT)); + else + Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), + SVT)); + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); +} + // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" // transformation. Returns true if extension are possible and the above @@ -4483,8 +4928,7 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, } Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), - &Ops[0], Ops.size())); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); } } @@ -4492,9 +4936,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // fold (sext c1) -> c1 - if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0); + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); // fold (sext (sext x)) -> (sext x) // fold (sext (aext x)) -> (sext x) @@ -4511,7 +4955,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorkList(oye); + AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4558,6 +5002,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // on vectors in one instruction. We only perform this transformation on // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && + ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -4610,7 +5055,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); - if (LN0->getExtensionType() != ISD::ZEXTLOAD) { + if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -4638,12 +5083,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { + EVT N0VT = N0.getOperand(0).getValueType(); // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. if (VT.isVector() && !LegalOperations && - TLI.getBooleanContents(true) == - TargetLowering::ZeroOrNegativeOneBooleanContent) { - EVT N0VT = N0.getOperand(0).getValueType(); + TLI.getBooleanContents(N0VT) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) // if this is the case. @@ -4671,7 +5116,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } - // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) unsigned ElementWidth = VT.getScalarType().getSizeInBits(); SDValue NegOne = DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); @@ -4680,15 +5125,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!VT.isVector() && - (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) { - return DAG.getSelect(SDLoc(N), VT, - DAG.getSetCC(SDLoc(N), - getSetCCResultType(VT), - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), - NegOne, DAG.getConstant(0, VT)); + + if (!VT.isVector()) { + EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); + if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { + SDLoc DL(N); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + SDValue SetCC = DAG.getSetCC(DL, + SetCCVT, + N0.getOperand(0), N0.getOperand(1), CC); + EVT SelectVT = getSetCCResultType(VT); + return DAG.getSelect(DL, VT, + DAG.getSExtOrTrunc(SetCC, DL, SelectVT), + NegOne, DAG.getConstant(0, VT)); + + } } } @@ -4703,13 +5154,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // isTruncateOf - If N is a truncate of some other value, return true, record // the value being truncated in Op and which of Op's bits are zero in KnownZero. // This function computes KnownZero to avoid a duplicated call to -// ComputeMaskedBits in the caller. +// computeKnownBits in the caller. static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, APInt &KnownZero) { APInt KnownOne; if (N->getOpcode() == ISD::TRUNCATE) { Op = N->getOperand(0); - DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, KnownZero, KnownOne); return true; } @@ -4730,7 +5181,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, else return false; - DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, KnownZero, KnownOne); if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) return false; @@ -4742,9 +5193,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // fold (zext c1) -> c1 - if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) @@ -4784,7 +5236,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorkList(oye); + AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4802,7 +5254,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorkList(oye); + AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4810,10 +5262,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); } else if (Op.getValueType().bitsGT(VT)) { Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); } return DAG.getZeroExtendInReg(Op, SDLoc(N), N0.getValueType().getScalarType()); @@ -4844,6 +5296,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // on vectors in one instruction. We only perform this transformation on // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && + ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -4876,7 +5329,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); - if (LN0->getExtensionType() != ISD::SEXTLOAD) { + if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -4925,10 +5378,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { - if (!LegalOperations && VT.isVector()) { + if (!LegalOperations && VT.isVector() && + N0.getValueType().getVectorElementType() == MVT::i1) { + EVT N0VT = N0.getOperand(0).getValueType(); + if (getSetCCResultType(N0VT) == N0.getValueType()) + return SDValue(); + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. // Only do this before legalize for now. - EVT N0VT = N0.getOperand(0).getValueType(); EVT EltVT = VT.getVectorElementType(); SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), DAG.getConstant(1, EltVT)); @@ -4943,7 +5400,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()), DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, - &OneOps[0], OneOps.size())); + OneOps)); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then @@ -4960,8 +5417,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { cast<CondCodeSDNode>(N0.getOperand(2))->get()); return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), - DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, - &OneOps[0], OneOps.size())); + DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps)); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc @@ -5007,9 +5463,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // fold (aext c1) -> c1 - if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0); + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) @@ -5027,7 +5484,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorkList(oye); + AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5067,8 +5524,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // on vectors in one instruction. We only perform this transformation on // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + ISD::isUNINDEXEDLoad(N0.getNode()) && + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -5096,20 +5553,26 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); + ISD::LoadExtType ExtType = LN0->getExtensionType(); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N), - VT, LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - CombineTo(N, ExtLoad); - CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad), - ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) { + SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), + VT, LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } } if (N0.getOpcode() == ISD::SETCC) { - // aext(setcc) -> sext_in_reg(vsetcc) for vectors. + // For vectors: + // aext(setcc) -> vsetcc + // aext(setcc) -> truncate(vsetcc) + // aext(setcc) -> aext(vsetcc) // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); @@ -5124,19 +5587,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then - // truncate/sign extend + // truncate/any extend else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); + EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); + return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); } } @@ -5160,7 +5618,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { default: break; case ISD::Constant: { const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); - assert(CV != 0 && "Const value should be ConstSDNode."); + assert(CV && "Const value should be ConstSDNode."); const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; if (NewVal != CVal) @@ -5324,7 +5782,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, LN0->getBasePtr(), DAG.getConstant(PtrOff, PtrType)); - AddToWorkList(NewPtr.getNode()); + AddToWorklist(NewPtr.getNode()); SDValue Load; if (ExtType == ISD::NON_EXTLOAD) @@ -5339,7 +5797,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { NewAlign, LN0->getTBAAInfo()); // Replace the old load's chain with the new load's chain. - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); // Shift the result left, if we've swallowed a left shift. @@ -5438,7 +5896,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - AddToWorkList(ExtLoad.getNode()); + AddToWorklist(ExtLoad.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use @@ -5461,11 +5919,34 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false); - if (BSwap.getNode() != 0) + if (BSwap.getNode()) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } + // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs + // into a build_vector. + if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + SmallVector<SDValue, 8> Elts; + unsigned NumElts = N0->getNumOperands(); + unsigned ShAmt = VTBits - EVTBits; + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = N0->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + Elts.push_back(Op); + continue; + } + + ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); + const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); + Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), + Op.getValueType())); + } + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); + } + return SDValue(); } @@ -5510,7 +5991,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // creates this pattern) and before operation legalization after which // we need to be more careful about the vector instructions that we generate. if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - LegalTypes && !LegalOperations && N0->hasOneUse()) { + LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { EVT VecTy = N0.getOperand(0).getValueType(); EVT ExTy = N0.getValueType(); @@ -5537,6 +6018,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // trunc (select c, a, b) -> select c, (trunc a), (trunc b) + if (N0.getOpcode() == ISD::SELECT) { + EVT SrcVT = N0.getValueType(); + if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && + TLI.isTruncateFree(SrcVT, VT)) { + SDLoc SL(N0); + SDValue Cond = N0.getOperand(0); + SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); + SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); + } + } + // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to @@ -5564,8 +6058,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0], - Opnds.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } } @@ -5587,6 +6080,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue Reduced = ReduceLoadWidth(N); if (Reduced.getNode()) return Reduced; + // Handle the case where the load remains an extending load even + // after truncation. + if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + if (!LN0->isVolatile() && + LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { + SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getMemoryVT(), + LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); + return NewLoad; + } + } } // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), // where ... are all 'undef'. @@ -5623,11 +6130,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { continue; } SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); - AddToWorkList(NV.getNode()); + AddToWorklist(NV.getNode()); Opnds.push_back(NV); } - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, - &Opnds[0], Opnds.size()); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); } } @@ -5654,8 +6160,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || - LD1->getPointerInfo().getAddrSpace() != - LD2->getPointerInfo().getAddrSpace()) + LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); @@ -5691,14 +6196,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (!LegalTypes && N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && VT.isVector()) { - bool isSimple = true; - for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) - if (N0.getOperand(i).getOpcode() != ISD::UNDEF && - N0.getOperand(i).getOpcode() != ISD::Constant && - N0.getOperand(i).getOpcode() != ISD::ConstantFP) { - isSimple = false; - break; - } + bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); EVT DestEltVT = N->getValueType(0).getVectorElementType(); assert(!DestEltVT.isVector() && @@ -5734,6 +6232,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile() && + // Do not remove the cast if the types differ in endian layout. + TLI.hasBigEndianPartOrdering(N0.getValueType()) == + TLI.hasBigEndianPartOrdering(VT) && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -5747,7 +6248,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), OrigAlign, LN0->getTBAAInfo()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(N0.getNode(), DAG.getNode(ISD::BITCAST, SDLoc(N0), N0.getValueType(), Load), @@ -5765,7 +6266,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { !VT.isVector() && !N0.getValueType().isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); - AddToWorkList(NewConv.getNode()); + AddToWorklist(NewConv.getNode()); APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) @@ -5788,34 +6289,34 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (isTypeLegal(IntXVT)) { SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), IntXVT, N0.getOperand(1)); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); // If X has a different width than the result/lhs, sext it or truncate it. unsigned VTWidth = VT.getSizeInBits(); if (OrigXWidth < VTWidth) { X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. X = DAG.getNode(ISD::SRL, SDLoc(X), X.getValueType(), X, DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, VT)); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, Cst, DAG.getConstant(~SignBit, VT)); - AddToWorkList(Cst.getNode()); + AddToWorklist(Cst.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); } @@ -5871,10 +6372,9 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), DstEltVT, Op)); - AddToWorkList(Ops.back().getNode()); + AddToWorklist(Ops.back().getNode()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); } // Otherwise, we're growing or shrinking the elements. To avoid having to @@ -5930,8 +6430,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); } // Finally, this must be the case where we are shrinking elements: each input @@ -5967,8 +6466,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); } SDValue DAGCombiner::visitFADD(SDNode *N) { @@ -6389,7 +6887,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); - AddToWorkList(RHSNeg.getNode()); + AddToWorklist(RHSNeg.getNode()); return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); } } @@ -6551,12 +7049,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); } - // The next optimizations are desireable only if SELECT_CC can be lowered. - // Check against MVT::Other for SELECT_CC, which is a workaround for targets - // having to say they don't support SELECT_CC on every type the DAG knows - // about, since there is no way to mark an opcode illegal at all value types - // (See also visitSELECT) - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + // The next optimizations are desirable only if SELECT_CC can be lowered. + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && !VT.isVector() && @@ -6566,7 +7060,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> @@ -6579,7 +7073,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); } } @@ -6608,12 +7102,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); } - // The next optimizations are desireable only if SELECT_CC can be lowered. - // Check against MVT::Other for SELECT_CC, which is a workaround for targets - // having to say they don't support SELECT_CC on every type the DAG knows - // about, since there is no way to mark an opcode illegal at all value types - // (See also visitSELECT) - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + // The next optimizations are desirable only if SELECT_CC can be lowered. + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && @@ -6623,7 +7113,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); } } @@ -6681,7 +7171,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); - AddToWorkList(Tmp.getNode()); + AddToWorklist(Tmp.getNode()); return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, Tmp, N0.getOperand(1)); } @@ -6732,8 +7222,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -6765,6 +7254,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading // constant pool values. + // TODO: We can also optimize for vectors here, but we need to make sure + // that the sign mask is created properly for each vector element. if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && !VT.isVector() && N0.getNode()->hasOneUse() && @@ -6774,7 +7265,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (IntVT.isInteger() && !IntVT.isVector()) { Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); - AddToWorkList(Int.getNode()); + AddToWorklist(Int.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } @@ -6783,11 +7274,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // (fneg (fmul c, x)) -> (fmul -c, x) if (N0.getOpcode() == ISD::FMUL) { ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - if (CFP1) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N0.getOperand(1))); + if (CFP1) { + APFloat CVal = CFP1->getValueAPF(); + CVal.changeSign(); + if (Level >= AfterLegalizeDAG && + (TLI.isFPImmLegal(CVal, N->getValueType(0)) || + TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) + return DAG.getNode( + ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); + } } return SDValue(); @@ -6852,16 +7348,18 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. + // TODO: We can also optimize for vectors here, but we need to make sure + // that the sign mask is created properly for each vector element. if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && - !N0.getOperand(0).getValueType().isVector()) { + !VT.isVector()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); - AddToWorkList(Int.getNode()); + AddToWorklist(Int.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } @@ -6895,7 +7393,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && (N1.getOperand(0).hasOneUse() && N1.getOperand(0).getOpcode() == ISD::SRL))) { - SDNode *Trunc = 0; + SDNode *Trunc = nullptr; if (N1.getOpcode() == ISD::TRUNCATE) { // Look pass the truncate. Trunc = N1.getNode(); @@ -6944,13 +7442,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { CombineTo(N, NewBRCond, false); // Truncate is dead. if (Trunc) { - removeFromWorkList(Trunc); + removeFromWorklist(Trunc); DAG.DeleteNode(Trunc); } // Replace the uses of SRL with SETCC - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - removeFromWorkList(N1.getNode()); + removeFromWorklist(N1.getNode()); DAG.DeleteNode(N1.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -6978,9 +7476,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { dbgs() << "\nWith: "; Tmp.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, Tmp); - removeFromWorkList(TheXor); + removeFromWorklist(TheXor); DAG.DeleteNode(TheXor); return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, Tmp, N2); @@ -7009,9 +7507,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Op0, Op1, Equal ? ISD::SETEQ : ISD::SETNE); // Replace the uses of XOR with SETCC - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - removeFromWorkList(N1.getNode()); + removeFromWorklist(N1.getNode()); DAG.DeleteNode(N1.getNode()); return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); @@ -7037,7 +7535,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), CondLHS, CondRHS, CC->get(), SDLoc(N), false); - if (Simp.getNode()) AddToWorkList(Simp.getNode()); + if (Simp.getNode()) AddToWorklist(Simp.getNode()); // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) @@ -7176,9 +7674,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // a copy of the original base pointer. SmallVector<SDNode *, 16> OtherUses; if (isa<ConstantSDNode>(Offset)) - for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(), - E = BasePtr.getNode()->use_end(); I != E; ++I) { - SDNode *Use = *I; + for (SDNode *Use : BasePtr.getNode()->uses()) { if (Use == Ptr.getNode()) continue; @@ -7220,9 +7716,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SmallPtrSet<const SDNode *, 32> Visited; SmallVector<const SDNode *, 16> Worklist; - for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), - E = Ptr.getNode()->use_end(); I != E; ++I) { - SDNode *Use = *I; + for (SDNode *Use : Ptr.getNode()->uses()) { if (Use == N) continue; if (N->hasPredecessorHelper(Use, Visited, Worklist)) @@ -7251,7 +7745,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); @@ -7310,13 +7804,13 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDLoc(OtherUses[i]), OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); - removeFromWorkList(OtherUses[i]); + removeFromWorklist(OtherUses[i]); DAG.DeleteNode(OtherUses[i]); } // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); - removeFromWorkList(Ptr.getNode()); + removeFromWorklist(Ptr.getNode()); DAG.DeleteNode(Ptr.getNode()); return true; @@ -7358,9 +7852,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Ptr.getNode()->hasOneUse()) return false; - for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), - E = Ptr.getNode()->use_end(); I != E; ++I) { - SDNode *Op = *I; + for (SDNode *Op : Ptr.getNode()->uses()) { if (Op == N || (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) continue; @@ -7386,9 +7878,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Check for #1. bool TryNext = false; - for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), - EE = BasePtr.getNode()->use_end(); II != EE; ++II) { - SDNode *Use = *II; + for (SDNode *Use : BasePtr.getNode()->uses()) { if (Use == Ptr.getNode()) continue; @@ -7396,9 +7886,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // transformation. if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ bool RealUse = false; - for (SDNode::use_iterator III = Use->use_begin(), - EEE = Use->use_end(); III != EEE; ++III) { - SDNode *UseUse = *III; + for (SDNode *UseUse : Use->uses()) { if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } @@ -7427,7 +7915,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); @@ -7441,7 +7929,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), Result.getValue(isLoad ? 1 : 0)); - removeFromWorkList(Op); + removeFromWorklist(Op); DAG.DeleteNode(Op); return true; } @@ -7474,11 +7962,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); dbgs() << "\n"); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); if (N->use_empty()) { - removeFromWorkList(N); + removeFromWorklist(N); DAG.DeleteNode(N); } @@ -7494,12 +7982,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); dbgs() << " and 2 other values\n"); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getUNDEF(N->getValueType(1))); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); - removeFromWorkList(N); + removeFromWorklist(N); DAG.DeleteNode(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -7537,7 +8025,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); - if (UseAA) { +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && LD->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -7561,7 +8054,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { MVT::Other, Chain, ReplLoad.getValue(1)); // Make sure the new and old chains are cleaned up. - AddToWorkList(Token.getNode()); + AddToWorklist(Token.getNode()); // Replace uses with load result and token factor. Don't add users // to work list. @@ -7686,8 +8179,8 @@ struct LoadedSlice { // This is used to get some contextual information about legal types, etc. SelectionDAG *DAG; - LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL, - unsigned Shift = 0, SelectionDAG *DAG = NULL) + LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, + unsigned Shift = 0, SelectionDAG *DAG = nullptr) : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} LoadedSlice(const LoadedSlice &LS) @@ -7783,7 +8276,7 @@ struct LoadedSlice { /// \brief Get the offset in bytes of this slice in the original chunk of /// bits. - /// \pre DAG != NULL. + /// \pre DAG != nullptr. uint64_t getOffsetFromBase() const { assert(DAG && "Missing context."); bool IsBigEndian = @@ -7888,14 +8381,6 @@ struct LoadedSlice { }; } -/// \brief Sorts LoadedSlice according to their offset. -struct LoadedSliceSorter { - bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) { - assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); - return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); - } -}; - /// \brief Check that all bits set in \p UsedBits form a dense region, i.e., /// \p UsedBits looks like 0..0 1..1 0..0. static bool areUsedBitsDense(const APInt &UsedBits) { @@ -7939,12 +8424,16 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, // Sort the slices so that elements that are likely to be next to each // other in memory are next to each other in the list. - std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter()); + std::sort(LoadedSlices.begin(), LoadedSlices.end(), + [](const LoadedSlice &LHS, const LoadedSlice &RHS) { + assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); + return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); + }); const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); // First (resp. Second) is the first (resp. Second) potentially candidate // to be placed in a paired load. - const LoadedSlice *First = NULL; - const LoadedSlice *Second = NULL; + const LoadedSlice *First = nullptr; + const LoadedSlice *Second = nullptr; for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, // Set the beginning of the pair. First = Second) { @@ -7966,7 +8455,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, unsigned RequiredAlignment = 0; if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { // move to the next pair, this type is hopeless. - Second = NULL; + Second = nullptr; continue; } // Check if we meet the alignment requirement. @@ -7980,7 +8469,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); --GlobalLSCost.Loads; // Move to the next pair. - Second = NULL; + Second = nullptr; } } @@ -8075,8 +8564,8 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { // The width of the type must be a power of 2 and greater than 8-bits. // Otherwise the load cannot be represented in LLVM IR. - // Moreover, if we shifted with a non 8-bits multiple, the slice - // will be accross several bytes. We do not support that. + // Moreover, if we shifted with a non-8-bits multiple, the slice + // will be across several bytes. We do not support that. unsigned Width = User->getValueSizeInBits(0); if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) return 0; @@ -8124,7 +8613,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { } SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, - &ArgChains[0], ArgChains.size()); + ArgChains); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); return true; } @@ -8219,14 +8708,14 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // that uses this. If not, this is not a replacement. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), ByteShift*8, (ByteShift+NumBytes)*8); - if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; + if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type // legalization. MVT VT = MVT::getIntegerVT(NumBytes*8); if (!DC->isTypeLegal(VT)) - return 0; + return nullptr; // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. @@ -8372,10 +8861,10 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { ST->getPointerInfo().getWithOffset(PtrOff), false, false, NewAlign); - AddToWorkList(NewPtr.getNode()); - AddToWorkList(NewLD.getNode()); - AddToWorkList(NewVal.getNode()); - WorkListRemover DeadNodes(*this); + AddToWorklist(NewPtr.getNode()); + AddToWorklist(NewLD.getNode()); + AddToWorklist(NewVal.getNode()); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); ++OpsNarrowed; return NewST; @@ -8430,9 +8919,9 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { ST->getPointerInfo(), false, false, STAlign); - AddToWorkList(NewLD.getNode()); - AddToWorkList(NewST.getNode()); - WorkListRemover DeadNodes(*this); + AddToWorklist(NewLD.getNode()); + AddToWorklist(NewST.getNode()); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); ++LdStFP2Int; return NewST; @@ -8543,17 +9032,6 @@ struct MemOpLink { unsigned SequenceNum; }; -/// Sorts store nodes in a link according to their offset from a shared -// base ptr. -struct ConsecutiveMemoryChainSorter { - bool operator()(MemOpLink LHS, MemOpLink RHS) { - return - LHS.OffsetFromBase < RHS.OffsetFromBase || - (LHS.OffsetFromBase == RHS.OffsetFromBase && - LHS.SequenceNum > RHS.SequenceNum); - } -}; - bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; @@ -8651,7 +9129,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { if (Ldn->isVolatile()) { - Index = NULL; + Index = nullptr; break; } @@ -8660,7 +9138,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { NextInChain = Ldn->getChain().getNode(); continue; } else { - Index = NULL; + Index = nullptr; break; } } @@ -8672,7 +9150,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Sort the memory operands according to their distance from the base pointer. std::sort(StoreNodes.begin(), StoreNodes.end(), - ConsecutiveMemoryChainSorter()); + [](MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase || + (LHS.OffsetFromBase == RHS.OffsetFromBase && + LHS.SequenceNum > RHS.SequenceNum); + }); // Scan the memory operations on the chain and find the first non-consecutive // store memory address. @@ -8720,7 +9202,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { NonZero |= !C->getConstantFPValue()->isNullValue(); } else { - // Non constant. + // Non-constant. break; } @@ -8831,7 +9313,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Since we know that St is redundant, just iterate. while (!St->use_empty()) DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - removeFromWorkList(St); + removeFromWorklist(St); DAG.DeleteNode(St); } @@ -9006,7 +9488,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { continue; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); - removeFromWorkList(St); + removeFromWorklist(St); DAG.DeleteNode(St); } @@ -9128,7 +9610,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); - if (UseAA) { +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && ST->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -9150,7 +9637,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { MVT::Other, Chain, ReplStore); // Make sure the new and old chains are cleaned up. - AddToWorkList(Token.getNode()); + AddToWorklist(Token.getNode()); // Don't add users to work list. return CombineTo(N, Token, false); @@ -9172,7 +9659,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { APInt::getLowBitsSet( Value.getValueType().getScalarType().getSizeInBits(), ST->getMemoryVT().getScalarType().getSizeInBits())); - AddToWorkList(Value.getNode()); + AddToWorklist(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), ST->getMemOperand()); @@ -9251,6 +9738,27 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return SDValue(); unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + // Canonicalize insert_vector_elt dag nodes. + // Example: + // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) + // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) + // + // Do this only if the child insert_vector node has one use; also + // do this only if indices are both constants and Idx1 < Idx0. + if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() + && isa<ConstantSDNode>(InVec.getOperand(2))) { + unsigned OtherElt = + cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); + if (Elt < OtherElt) { + // Swap nodes. + SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, + InVec.getOperand(0), InVal, EltNo); + AddToWorklist(NewOp.getNode()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), + VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); + } + } + // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially // be converted to a BUILD_VECTOR). Fill in the Ops vector with the // vector elements. @@ -9280,8 +9788,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } // Return the new vector - return DAG.getNode(ISD::BUILD_VECTOR, dl, - VT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { @@ -9309,9 +9816,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD // patterns. For example on AVX, extracting elements from a wide vector - // without using extract_subvector. + // without using extract_subvector. However, if we can find an underlying + // scalar value, then we can always use that. if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE - && ConstEltNo && !LegalOperations) { + && ConstEltNo) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); int NumElem = VT.getVectorNumElements(); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); @@ -9323,16 +9831,32 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getUNDEF(NVT); // Select the right vector half to extract from. + SDValue SVInVec; if (OrigElt < NumElem) { - InVec = InVec->getOperand(0); + SVInVec = InVec->getOperand(0); } else { - InVec = InVec->getOperand(1); + SVInVec = InVec->getOperand(1); OrigElt -= NumElem; } - EVT IndexTy = TLI.getVectorIdxTy(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, - InVec, DAG.getConstant(OrigElt, IndexTy)); + if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { + SDValue InOp = SVInVec.getOperand(OrigElt); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); + } + + return InOp; + } + + // FIXME: We should handle recursing on other vector shuffles and + // scalar_to_vector here as well. + + if (!LegalOperations) { + EVT IndexTy = TLI.getVectorIdxTy(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, + SVInVec, DAG.getConstant(OrigElt, IndexTy)); + } } // Perform only after legalization to ensure build_vector / vector_shuffle @@ -9370,8 +9894,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { NewLoad = true; } - LoadSDNode *LN0 = NULL; - const ShuffleVectorSDNode *SVN = NULL; + LoadSDNode *LN0 = nullptr; + const ShuffleVectorSDNode *SVN = nullptr; if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && @@ -9478,16 +10002,16 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { else Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load); } - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; SDValue To[] = { Load, Chain }; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); // Since we're explcitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. - AddToWorkList(Load.getNode()); - AddUsersToWorkList(Load.getNode()); // Add users too + AddToWorklist(Load.getNode()); + AddUsersToWorklist(Load.getNode()); // Add users too // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorkList(N); + AddToWorklist(N); return SDValue(N, 0); } @@ -9596,10 +10120,10 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!isTypeLegal(VecVT)) return SDValue(); // Make the new BUILD_VECTOR. - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size()); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); // The new BUILD_VECTOR node has the potential to be further optimized. - AddToWorkList(BV.getNode()); + AddToWorklist(BV.getNode()); // Bitcast to the desired type. return DAG.getNode(ISD::BITCAST, dl, VT, BV); } @@ -9664,9 +10188,8 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { else Opnds.push_back(In.getOperand(0)); } - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, - &Opnds[0], Opnds.size()); - AddToWorkList(BV.getNode()); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); + AddToWorklist(BV.getNode()); return DAG.getNode(Opcode, dl, VT, BV); } @@ -9706,7 +10229,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // constant index, bail out. if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { - VecIn1 = VecIn2 = SDValue(0, 0); + VecIn1 = VecIn2 = SDValue(nullptr, 0); break; } @@ -9715,18 +10238,18 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) continue; - if (VecIn1.getNode() == 0) { + if (!VecIn1.getNode()) { VecIn1 = ExtractedFromVec; - } else if (VecIn2.getNode() == 0) { + } else if (!VecIn2.getNode()) { VecIn2 = ExtractedFromVec; } else { // Too many inputs. - VecIn1 = VecIn2 = SDValue(0, 0); + VecIn1 = VecIn2 = SDValue(nullptr, 0); break; } } - // If everything is good, we can make a shuffle operation. + // If everything is good, we can make a shuffle operation. if (VecIn1.getNode()) { SmallVector<int, 8> Mask; for (unsigned i = 0; i != NumInScalars; ++i) { @@ -9756,7 +10279,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // Attempt to transform a single input vector to the correct type. if ((VT != VecIn1.getValueType())) { // We don't support shuffeling between TWO values of different types. - if (VecIn2.getNode() != 0) + if (VecIn2.getNode()) return SDValue(); // We only support widening of vectors which are half the size of the @@ -9839,6 +10362,39 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } } + // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) + // -> (BUILD_VECTOR A, B, ..., C, D, ...) + if (N->getNumOperands() == 2 && + N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SmallVector<SDValue, 8> Opnds; + unsigned BuildVecNumElts = N0.getNumOperands(); + + EVT SclTy0 = N0.getOperand(0)->getValueType(0); + EVT SclTy1 = N1.getOperand(0)->getValueType(0); + if (SclTy0.isFloatingPoint()) { + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N0.getOperand(i)); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N1.getOperand(i)); + } else { + // If BUILD_VECTOR are from built from integer, they may have different + // operand types. Get the smaller type and truncate all operands to it. + EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N0.getOperand(i))); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N1.getOperand(i))); + } + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); + } + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -9993,8 +10549,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { } } - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(), - Ops.size()); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { @@ -10110,22 +10665,19 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // If this shuffle node is simply a swizzle of another shuffle node, - // and it reverses the swizzle of the previous shuffle then we can - // optimize shuffle(shuffle(x, undef), undef) -> x. + // then try to simplify it. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && N1.getOpcode() == ISD::UNDEF) { ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); - // Shuffle nodes can only reverse shuffles with a single non-undef value. - if (N0.getOperand(1).getOpcode() != ISD::UNDEF) - return SDValue(); - // The incoming shuffle must be of the same type as the result of the // current shuffle. assert(OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"); + SmallVector<int, 4> Mask; + // Compute the combined shuffle mask. for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); assert(Idx < (int)NumElts && "Index references undef operand"); @@ -10133,13 +10685,174 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // shuffle. Adopt the incoming index. if (Idx >= 0) Idx = OtherSV->getMaskElt(Idx); + Mask.push_back(Idx); + } + + bool CommuteOperands = false; + if (N0.getOperand(1).getOpcode() != ISD::UNDEF) { + // To be valid, the combine shuffle mask should only reference elements + // from one of the two vectors in input to the inner shufflevector. + bool IsValidMask = true; + for (unsigned i = 0; i != NumElts && IsValidMask; ++i) + // See if the combined mask only reference undefs or elements coming + // from the first shufflevector operand. + IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts; + + if (!IsValidMask) { + IsValidMask = true; + for (unsigned i = 0; i != NumElts && IsValidMask; ++i) + // Check that all the elements come from the second shuffle operand. + IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts; + CommuteOperands = IsValidMask; + } - // The combined shuffle must map each index to itself. - if (Idx >= 0 && (unsigned)Idx != i) + // Early exit if the combined shuffle mask is not valid. + if (!IsValidMask) return SDValue(); } - return OtherSV->getOperand(0); + // See if this pair of shuffles can be safely folded according to either + // of the following rules: + // shuffle(shuffle(x, y), undef) -> x + // shuffle(shuffle(x, undef), undef) -> x + // shuffle(shuffle(x, y), undef) -> y + bool IsIdentityMask = true; + unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0; + for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) { + // Skip Undefs. + if (Mask[i] < 0) + continue; + + // The combined shuffle must map each index to itself. + IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex; + } + + if (IsIdentityMask) { + if (CommuteOperands) + // optimize shuffle(shuffle(x, y), undef) -> y. + return OtherSV->getOperand(1); + + // optimize shuffle(shuffle(x, undef), undef) -> x + // optimize shuffle(shuffle(x, y), undef) -> x + return OtherSV->getOperand(0); + } + + // It may still be beneficial to combine the two shuffles if the + // resulting shuffle is legal. + if (TLI.isTypeLegal(VT) && TLI.isShuffleMaskLegal(Mask, VT)) { + if (!CommuteOperands) + // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, + &Mask[0]); + + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1), + &Mask[0]); + } + } + + // Canonicalize shuffles according to rules: + // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) + // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) + // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) + if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::UNDEF && + N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + TLI.isTypeLegal(VT)) { + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(N1->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0 = N1->getOperand(0); + SDValue SV1 = N1->getOperand(1); + bool HasSameOp0 = N0 == SV0; + bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; + if (HasSameOp0 || IsSV1Undef || N0 == SV1) + // Commute the operands of this shuffle so that next rule + // will trigger. + return DAG.getCommutedVectorShuffle(*SVN); + } + + // Try to fold according to rules: + // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) + // Don't try to fold shuffles with illegal type. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) { + ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0 = OtherSV->getOperand(0); + SDValue SV1 = OtherSV->getOperand(1); + bool HasSameOp0 = N1 == SV0; + bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; + if (!HasSameOp0 && !IsSV1Undef && N1 != SV1) + // Early exit. + return SDValue(); + + SmallVector<int, 4> Mask; + // Compute the combined shuffle mask for a shuffle with SV0 as the first + // operand, and SV1 as the second operand. + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx < 0) { + // Propagate Undef. + Mask.push_back(Idx); + continue; + } + + if (Idx < (int)NumElts) { + Idx = OtherSV->getMaskElt(Idx); + if (IsSV1Undef && Idx >= (int) NumElts) + Idx = -1; // Propagate Undef. + } else + Idx = HasSameOp0 ? Idx - NumElts : Idx; + + Mask.push_back(Idx); + } + + // Avoid introducing shuffles with illegal mask. + if (TLI.isShuffleMaskLegal(Mask, VT)) { + if (IsSV1Undef) + // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N2 = N->getOperand(2); + + // If the input vector is a concatenation, and the insert replaces + // one of the halves, we can optimize into a single concat_vectors. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && + N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { + APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); + EVT VT = N->getValueType(0); + + // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> + // (concat_vectors Z, Y) + if (InsIdx == 0) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, + N->getOperand(1), N0.getOperand(1)); + + // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> + // (concat_vectors X, Z) + if (InsIdx == VT.getVectorNumElements()/2) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, + N0.getOperand(0), N->getOperand(1)); } return SDValue(); @@ -10182,8 +10895,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT EltVT = RVT.getVectorElementType(); SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - RVT, &ZeroOps[0], ZeroOps.size()); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); @@ -10207,18 +10919,15 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // this operation. if (LHS.getOpcode() == ISD::BUILD_VECTOR && RHS.getOpcode() == ISD::BUILD_VECTOR) { + // Check if both vectors are constants. If not bail out. + if (!(cast<BuildVectorSDNode>(LHS)->isConstant() && + cast<BuildVectorSDNode>(RHS)->isConstant())) + return SDValue(); + SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { SDValue LHSOp = LHS.getOperand(i); SDValue RHSOp = RHS.getOperand(i); - // If these two elements can't be folded, bail out. - if ((LHSOp.getOpcode() != ISD::UNDEF && - LHSOp.getOpcode() != ISD::Constant && - LHSOp.getOpcode() != ISD::ConstantFP) || - (RHSOp.getOpcode() != ISD::UNDEF && - RHSOp.getOpcode() != ISD::Constant && - RHSOp.getOpcode() != ISD::ConstantFP)) - break; // Can't fold divide by zero. if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || @@ -10251,12 +10960,32 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { FoldOp.getOpcode() != ISD::ConstantFP) break; Ops.push_back(FoldOp); - AddToWorkList(FoldOp.getNode()); + AddToWorklist(FoldOp.getNode()); } if (Ops.size() == LHS.getNumOperands()) - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - LHS.getValueType(), &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); + } + + // Type legalization might introduce new shuffles in the DAG. + // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) + // -> (shuffle (VBinOp (A, B)), Undef, Mask). + if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && + isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && + LHS.getOperand(1).getOpcode() == ISD::UNDEF && + RHS.getOperand(1).getOpcode() == ISD::UNDEF) { + ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); + ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); + + if (SVN0->getMask().equals(SVN1->getMask())) { + EVT VT = N->getValueType(0); + SDValue UndefVector = LHS.getOperand(1); + SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + LHS.getOperand(0), RHS.getOperand(0)); + AddUsersToWorklist(N); + return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, + &SVN0->getMask()[0]); + } } return SDValue(); @@ -10285,14 +11014,13 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { FoldOp.getOpcode() != ISD::ConstantFP) break; Ops.push_back(FoldOp); - AddToWorkList(FoldOp.getNode()); + AddToWorklist(FoldOp.getNode()); } if (Ops.size() != N0.getNumOperands()) return SDValue(); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - N0.getValueType(), &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops); } SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, @@ -10313,7 +11041,7 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, N0.getValueType(), SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4)); - AddToWorkList(SETCC.getNode()); + AddToWorklist(SETCC.getNode()); return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SCC.getOperand(2), SCC.getOperand(3), SETCC); } @@ -10454,7 +11182,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Determine if the condition we're dealing with is constant SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, DL, false); - if (SCC.getNode()) AddToWorkList(SCC.getNode()); + if (SCC.getNode()) AddToWorklist(SCC.getNode()); ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); // fold select_cc true, x, y -> x @@ -10494,7 +11222,9 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { if (TLI.isTypeLegal(N2.getValueType()) && (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != - TargetLowering::Legal) && + TargetLowering::Legal && + !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && + !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && // If both constants have multiple uses, then we won't need to do an // extra load, they are likely around in registers for other users. (TV->hasOneUse() || FV->hasOneUse())) { @@ -10520,13 +11250,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); - AddToWorkList(Cond.getNode()); + AddToWorklist(Cond.getNode()); SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero); - AddToWorkList(CstOffset.getNode()); + AddToWorklist(CstOffset.getNode()); CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); - AddToWorkList(CPIdx.getNode()); + AddToWorklist(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, Alignment); @@ -10551,11 +11281,11 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, getShiftAmountTy(N0.getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, ShCt); - AddToWorkList(Shift.getNode()); + AddToWorklist(Shift.getNode()); if (XType.bitsGT(AType)) { Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); - AddToWorkList(Shift.getNode()); + AddToWorklist(Shift.getNode()); } return DAG.getNode(ISD::AND, DL, AType, Shift, N2); @@ -10565,11 +11295,11 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); - AddToWorkList(Shift.getNode()); + AddToWorklist(Shift.getNode()); if (XType.bitsGT(AType)) { Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); - AddToWorkList(Shift.getNode()); + AddToWorklist(Shift.getNode()); } return DAG.getNode(ISD::AND, DL, AType, Shift, N2); @@ -10609,8 +11339,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // fold select C, 16, 0 -> shl C, 4 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && - TLI.getBooleanContents(N0.getValueType().isVector()) == - TargetLowering::ZeroOrOneBooleanContent) { + TLI.getBooleanContents(N0.getValueType()) == + TargetLowering::ZeroOrOneBooleanContent) { // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. @@ -10639,8 +11369,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, N2.getValueType(), SCC); } - AddToWorkList(SCC.getNode()); - AddToWorkList(Temp.getNode()); + AddToWorklist(SCC.getNode()); + AddToWorklist(Temp.getNode()); if (N2C->getAPIntValue() == 1) return Temp; @@ -10701,7 +11431,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // select_cc setlt X, 1, -X, X -> // Y = sra (X, size(X)-1); xor (add (X, Y), Y) if (N1C) { - ConstantSDNode *SubC = NULL; + ConstantSDNode *SubC = nullptr; if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || (N1C->isAllOnesValue() && CC == ISD::SETGT)) && N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) @@ -10719,8 +11449,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, getShiftAmountTy(N0.getValueType()))); SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), XType, N0, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); + AddToWorklist(Shift.getNode()); + AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); } } @@ -10742,26 +11472,42 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> SDValue DAGCombiner::BuildSDIV(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (!C->getAPIntValue()) + return SDValue(); + std::vector<SDNode*> Built; - SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built); + SDValue S = + TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); - for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); - ii != ee; ++ii) - AddToWorkList(*ii); + for (SDNode *N : Built) + AddToWorklist(N); return S; } -/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> SDValue DAGCombiner::BuildUDIV(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (!C->getAPIntValue()) + return SDValue(); + std::vector<SDNode*> Built; - SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built); + SDValue S = + TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); - for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); - ii != ee; ++ii) - AddToWorkList(*ii); + for (SDNode *N : Built) + AddToWorklist(N); return S; } @@ -10771,7 +11517,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. - Base = Ptr; Offset = 0; GV = 0; CV = 0; + Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; // If it's an adding a simple constant then integrate the offset. if (Base.getOpcode() == ISD::ADD) { @@ -10805,31 +11551,27 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, /// isAlias - Return true if there is any possibility that the two addresses /// overlap. -bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, - const Value *SrcValue1, int SrcValueOffset1, - unsigned SrcValueAlign1, - const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, bool IsVolatile2, - const Value *SrcValue2, int SrcValueOffset2, - unsigned SrcValueAlign2, - const MDNode *TBAAInfo2) const { +bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // If they are the same then they must be aliases. - if (Ptr1 == Ptr2) return true; + if (Op0->getBasePtr() == Op1->getBasePtr()) return true; // If they are both volatile then they cannot be reordered. - if (IsVolatile1 && IsVolatile2) return true; + if (Op0->isVolatile() && Op1->isVolatile()) return true; // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; const GlobalValue *GV1, *GV2; const void *CV1, *CV2; - bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); - bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); + bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), + Base1, Offset1, GV1, CV1); + bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), + Base2, Offset2, GV2, CV2); // If they have a same base address then check to see if they overlap. if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) - return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || + (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); // It is possible for different frame indices to alias each other, mostly // when tail call optimization reuses return address slots for arguments. @@ -10839,7 +11581,8 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); - return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || + (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); } // Otherwise, if we know what the bases are, and they aren't identical, then @@ -10851,28 +11594,44 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, // compared to the size and offset of the access, we may be able to prove they // do not alias. This check is conservative for now to catch cases created by // splitting vector types. - if ((SrcValueAlign1 == SrcValueAlign2) && - (SrcValueOffset1 != SrcValueOffset2) && - (Size1 == Size2) && (SrcValueAlign1 > Size1)) { - int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; - int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; + if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && + (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && + (Op0->getMemoryVT().getSizeInBits() >> 3 == + Op1->getMemoryVT().getSizeInBits() >> 3) && + (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) { + int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); + int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); // There is no overlap between these relatively aligned accesses of similar // size, return no alias. - if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) + if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 || + (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) return false; } bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); - if (UseAA && SrcValue1 && SrcValue2) { +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && + Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { // Use alias analysis information. - int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); - int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; - int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; + int64_t MinOffset = std::min(Op0->getSrcValueOffset(), + Op1->getSrcValueOffset()); + int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + + Op0->getSrcValueOffset() - MinOffset; + int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + + Op1->getSrcValueOffset() - MinOffset; AliasAnalysis::AliasResult AAResult = - AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), - AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2)); + AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(), + Overlap1, + UseTBAA ? Op0->getTBAAInfo() : nullptr), + AliasAnalysis::Location(Op1->getMemOperand()->getValue(), + Overlap2, + UseTBAA ? Op1->getTBAAInfo() : nullptr)); if (AAResult == AliasAnalysis::NoAlias) return false; } @@ -10881,44 +11640,6 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, return true; } -bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) { - SDValue Ptr0, Ptr1; - int64_t Size0, Size1; - bool IsVolatile0, IsVolatile1; - const Value *SrcValue0, *SrcValue1; - int SrcValueOffset0, SrcValueOffset1; - unsigned SrcValueAlign0, SrcValueAlign1; - const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1; - FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, - SrcValueAlign0, SrcTBAAInfo0); - FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, - SrcValueAlign1, SrcTBAAInfo1); - return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, - SrcValueAlign0, SrcTBAAInfo0, - Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, - SrcValueAlign1, SrcTBAAInfo1); -} - -/// FindAliasInfo - Extracts the relevant alias information from the memory -/// node. Returns true if the operand was a nonvolatile load. -bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, bool &IsVolatile, - const Value *&SrcValue, - int &SrcValueOffset, - unsigned &SrcValueAlign, - const MDNode *&TBAAInfo) const { - LSBaseSDNode *LS = cast<LSBaseSDNode>(N); - - Ptr = LS->getBasePtr(); - Size = LS->getMemoryVT().getSizeInBits() >> 3; - IsVolatile = LS->isVolatile(); - SrcValue = LS->getSrcValue(); - SrcValueOffset = LS->getSrcValueOffset(); - SrcValueAlign = LS->getOriginalAlignment(); - TBAAInfo = LS->getTBAAInfo(); - return isa<LoadSDNode>(LS) && !IsVolatile; -} - /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, @@ -10927,15 +11648,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallPtrSet<SDNode *, 16> Visited; // Visited node set. // Get alias information for node. - SDValue Ptr; - int64_t Size; - bool IsVolatile; - const Value *SrcValue; - int SrcValueOffset; - unsigned SrcValueAlign; - const MDNode *SrcTBAAInfo; - bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue, - SrcValueOffset, SrcValueAlign, SrcTBAAInfo); + bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); // Starting off. Chains.push_back(OriginalChain); @@ -10959,7 +11672,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, if (Depth > 6 || Aliases.size() == 2) { Aliases.clear(); Aliases.push_back(OriginalChain); - break; + return; } // Don't bother if we've been before. @@ -10974,24 +11687,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::LOAD: case ISD::STORE: { // Get alias information for Chain. - SDValue OpPtr; - int64_t OpSize; - bool OpIsVolatile; - const Value *OpSrcValue; - int OpSrcValueOffset; - unsigned OpSrcValueAlign; - const MDNode *OpSrcTBAAInfo; - bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpIsVolatile, OpSrcValue, OpSrcValueOffset, - OpSrcValueAlign, - OpSrcTBAAInfo); + bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && + !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset, - SrcValueAlign, SrcTBAAInfo, - OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset, - OpSrcValueAlign, OpSrcTBAAInfo)) { + isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) { Aliases.push_back(Chain); } else { // Look further up the chain. @@ -11021,6 +11722,63 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, break; } } + + // We need to be careful here to also search for aliases through the + // value operand of a store, etc. Consider the following situation: + // Token1 = ... + // L1 = load Token1, %52 + // S1 = store Token1, L1, %51 + // L2 = load Token1, %52+8 + // S2 = store Token1, L2, %51+8 + // Token2 = Token(S1, S2) + // L3 = load Token2, %53 + // S3 = store Token2, L3, %52 + // L4 = load Token2, %53+8 + // S4 = store Token2, L4, %52+8 + // If we search for aliases of S3 (which loads address %52), and we look + // only through the chain, then we'll miss the trivial dependence on L1 + // (which also loads from %52). We then might change all loads and + // stores to use Token1 as their chain operand, which could result in + // copying %53 into %52 before copying %52 into %51 (which should + // happen first). + // + // The problem is, however, that searching for such data dependencies + // can become expensive, and the cost is not directly related to the + // chain depth. Instead, we'll rule out such configurations here by + // insisting that we've visited all chain users (except for users + // of the original chain, which is not necessary). When doing this, + // we need to look through nodes we don't care about (otherwise, things + // like register copies will interfere with trivial cases). + + SmallVector<const SDNode *, 16> Worklist; + for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(), + IE = Visited.end(); I != IE; ++I) + if (*I != OriginalChain.getNode()) + Worklist.push_back(*I); + + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); + + // We have already visited M, and want to make sure we've visited any uses + // of M that we care about. For uses that we've not visisted, and don't + // care about, queue them to the worklist. + + for (SDNode::use_iterator UI = M->use_begin(), + UIE = M->use_end(); UI != UIE; ++UI) + if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) { + if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) { + // We've not visited this use, and we care about it (it could have an + // ordering dependency with the original node). + Aliases.clear(); + Aliases.push_back(OriginalChain); + return; + } + + // We've not visited this use, but we don't care about it. Mark it as + // visited and enqueue it to the worklist. + Worklist.push_back(*UI); + } + } } /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking @@ -11040,8 +11798,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return Aliases[0]; // Construct a custom tailored token factor. - return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, - &Aliases[0], Aliases.size()); + return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } // SelectionDAG::Combine - This is the entry point for the file. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index a6f7461..ad75e91 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,18 +39,21 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "isel" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DebugInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" @@ -64,12 +67,29 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "isel" + STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " "target-independent selector"); STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " "target-specific selector"); STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); +/// \brief Set CallLoweringInfo attribute flags based on a call instruction +/// and called function attributes. +void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS, + unsigned AttrIdx) { + isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); + isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); + isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); + isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); + isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); + isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); + isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + Alignment = CS->getParamAlignment(AttrIdx); +} + /// startNewBlock - Set the current block to which generated machine /// instructions will be appended, and clear the local CSE map. /// @@ -79,7 +99,7 @@ void FastISel::startNewBlock() { // Instructions are appended to FuncInfo.MBB. If the basic block already // contains labels or copies, use the last instruction as the last local // value. - EmitStartPt = 0; + EmitStartPt = nullptr; if (!FuncInfo.MBB->empty()) EmitStartPt = &FuncInfo.MBB->back(); LastLocalValue = EmitStartPt; @@ -118,7 +138,7 @@ bool FastISel::hasTrivialKill(const Value *V) const { // No-op casts are trivially coalesced by fast-isel. if (const CastInst *Cast = dyn_cast<CastInst>(I)) - if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) && + if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) && !hasTrivialKill(Cast->getOperand(0))) return false; @@ -133,7 +153,7 @@ bool FastISel::hasTrivialKill(const Value *V) const { !(I->getOpcode() == Instruction::BitCast || I->getOpcode() == Instruction::PtrToInt || I->getOpcode() == Instruction::IntToPtr) && - cast<Instruction>(*I->use_begin())->getParent() == I->getParent(); + cast<Instruction>(*I->user_begin())->getParent() == I->getParent(); } unsigned FastISel::getRegForValue(const Value *V) { @@ -192,7 +212,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. Reg = - getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); + getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getContext()))); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { if (CF->isNullValue()) { Reg = TargetMaterializeFloatZero(CF); @@ -229,7 +249,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { Reg = lookUpRegForValue(Op); } else if (isa<UndefValue>(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } @@ -335,20 +355,20 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I, FastISel::SavePoint FastISel::enterLocalValueArea() { MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; - DebugLoc OldDL = DL; + DebugLoc OldDL = DbgLoc; recomputeInsertPt(); - DL = DebugLoc(); + DbgLoc = DebugLoc(); SavePoint SP = { OldInsertPt, OldDL }; return SP; } void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) - LastLocalValue = llvm::prior(FuncInfo.InsertPt); + LastLocalValue = std::prev(FuncInfo.InsertPt); // Restore the previous insert position. FuncInfo.InsertPt = OldInsertPt.InsertPt; - DL = OldInsertPt.DL; + DbgLoc = OldInsertPt.DL; } /// SelectBinaryOp - Select and emit code for a binary operator instruction, @@ -484,7 +504,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset - TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field); + TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); if (TotalOffs >= MaxOffs) { N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) @@ -503,7 +523,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { if (CI->isZero()) continue; // N = N + Offset TotalOffs += - TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + DL.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); if (TotalOffs >= MaxOffs) { N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) @@ -524,7 +544,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { } // N = N + Idx * ElementSize; - uint64_t ElementSize = TD.getTypeAllocSize(Ty); + uint64_t ElementSize = DL.getTypeAllocSize(Ty); std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); unsigned IdxN = Pair.first; bool IdxNIsKill = Pair.second; @@ -557,11 +577,472 @@ bool FastISel::SelectGetElementPtr(const User *I) { return true; } +/// \brief Add a stackmap or patchpoint intrinsic call's live variable operands +/// to a stackmap or patchpoint machine instruction. +bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, + const CallInst *CI, unsigned StartIdx) { + for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { + Value *Val = CI->getArgOperand(i); + // Check for constants and encode them with a StackMaps::ConstantOp prefix. + if (auto *C = dyn_cast<ConstantInt>(Val)) { + Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); + Ops.push_back(MachineOperand::CreateImm(C->getSExtValue())); + } else if (isa<ConstantPointerNull>(Val)) { + Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); + Ops.push_back(MachineOperand::CreateImm(0)); + } else if (auto *AI = dyn_cast<AllocaInst>(Val)) { + // Values coming from a stack location also require a sepcial encoding, + // but that is added later on by the target specific frame index + // elimination implementation. + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + Ops.push_back(MachineOperand::CreateFI(SI->second)); + else + return false; + } else { + unsigned Reg = getRegForValue(Val); + if (Reg == 0) + return false; + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + } + } + + return true; +} + +bool FastISel::SelectStackmap(const CallInst *I) { + // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, + // [live variables...]) + assert(I->getCalledFunction()->getReturnType()->isVoidTy() && + "Stackmap cannot return a value."); + + // The stackmap intrinsic only records the live variables (the arguments + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target-specific lowering code. + // Instead we perform the call lowering right here. + // + // CALLSEQ_START(0) + // STACKMAP(id, nbytes, ...) + // CALLSEQ_END(0, 0) + // + SmallVector<MachineOperand, 32> Ops; + + // Add the <id> and <numBytes> constants. + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) && + "Expected a constant integer."); + const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); + + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && + "Expected a constant integer."); + const auto *NumBytes = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); + + // Push live variables for the stack map (skipping the first two arguments + // <id> and <numBytes>). + if (!addStackMapLiveVars(Ops, I, 2)) + return false; + + // We are not adding any register mask info here, because the stackmap doesn't + // clobber anything. + + // Add scratch registers as implicit def and early clobber. + CallingConv::ID CC = I->getCallingConv(); + const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); + for (unsigned i = 0; ScratchRegs[i]; ++i) + Ops.push_back(MachineOperand::CreateReg( + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + + // Issue CALLSEQ_START + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) + .addImm(0); + + // Issue STACKMAP. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::STACKMAP)); + for (auto const &MO : Ops) + MIB.addOperand(MO); + + // Issue CALLSEQ_END + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) + .addImm(0).addImm(0); + + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo()->setHasStackMap(); + + return true; +} + +/// \brief Lower an argument list according to the target calling convention. +/// +/// This is a helper for lowering intrinsics that follow a target calling +/// convention or require stack pointer adjustment. Only a subset of the +/// intrinsic's operands need to participate in the calling convention. +bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, + unsigned NumArgs, const Value *Callee, + bool ForceRetVoidTy, CallLoweringInfo &CLI) { + ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + ImmutableCallSite CS(CI); + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + ArgI != ArgE; ++ArgI) { + Value *V = CI->getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + ArgListEntry Entry; + Entry.Val = V; + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, AttrI); + Args.push_back(Entry); + } + + Type *RetTy = ForceRetVoidTy ? Type::getVoidTy(CI->getType()->getContext()) + : CI->getType(); + CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs); + + return LowerCallTo(CLI); +} + +bool FastISel::SelectPatchpoint(const CallInst *I) { + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, + // i32 <numBytes>, + // i8* <target>, + // i32 <numArgs>, + // [Args...], + // [live variables...]) + CallingConv::ID CC = I->getCallingConv(); + bool IsAnyRegCC = CC == CallingConv::AnyReg; + bool HasDef = !I->getType()->isVoidTy(); + Value *Callee = I->getOperand(PatchPointOpers::TargetPos); + + // Get the real number of arguments participating in the call <numArgs> + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) && + "Expected a constant integer."); + const auto *NumArgsVal = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)); + unsigned NumArgs = NumArgsVal->getZExtValue(); + + // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> + // This includes all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs && + "Not enough arguments provided to the patchpoint intrinsic"); + + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; + CallLoweringInfo CLI; + if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI)) + return false; + + assert(CLI.Call && "No call instruction specified."); + + SmallVector<MachineOperand, 32> Ops; + + // Add an explicit result reg if we use the anyreg calling convention. + if (IsAnyRegCC && HasDef) { + assert(CLI.NumResultRegs == 0 && "Unexpected result register."); + CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64)); + CLI.NumResultRegs = 1; + Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true)); + } + + // Add the <id> and <numBytes> constants. + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) && + "Expected a constant integer."); + const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); + + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && + "Expected a constant integer."); + const auto *NumBytes = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); + + // Assume that the callee is a constant address or null pointer. + // FIXME: handle function symbols in the future. + unsigned CalleeAddr; + if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) + CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue(); + else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) { + if (C->getOpcode() == Instruction::IntToPtr) + CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue(); + else + llvm_unreachable("Unsupported ConstantExpr."); + } else if (isa<ConstantPointerNull>(Callee)) + CalleeAddr = 0; + else + llvm_unreachable("Unsupported callee address."); + + Ops.push_back(MachineOperand::CreateImm(CalleeAddr)); + + // Adjust <numArgs> to account for any arguments that have been passed on + // the stack instead. + unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size(); + Ops.push_back(MachineOperand::CreateImm(NumCallRegArgs)); + + // Add the calling convention + Ops.push_back(MachineOperand::CreateImm((unsigned)CC)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (IsAnyRegCC) { + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) { + unsigned Reg = getRegForValue(I->getArgOperand(i)); + if (!Reg) + return false; + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + } + } + + // Push the arguments from the call instruction. + for (auto Reg : CLI.OutRegs) + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + + // Push live variables for the stack map. + if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs)) + return false; + + // Push the register mask info. + Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC))); + + // Add scratch registers as implicit def and early clobber. + const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); + for (unsigned i = 0; ScratchRegs[i]; ++i) + Ops.push_back(MachineOperand::CreateReg( + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + + // Add implicit defs (return values). + for (auto Reg : CLI.InRegs) + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true, + /*IsImpl=*/true)); + + // Insert the patchpoint instruction before the call generated by the target. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc, + TII.get(TargetOpcode::PATCHPOINT)); + + for (auto &MO : Ops) + MIB.addOperand(MO); + + MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI); + + // Delete the original call instruction. + CLI.Call->eraseFromParent(); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); + + if (CLI.NumResultRegs) + UpdateValueMap(I, CLI.ResultReg, CLI.NumResultRegs); + return true; +} + +/// Returns an AttributeSet representing the attributes applied to the return +/// value of the given call. +static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) { + SmallVector<Attribute::AttrKind, 2> Attrs; + if (CLI.RetSExt) + Attrs.push_back(Attribute::SExt); + if (CLI.RetZExt) + Attrs.push_back(Attribute::ZExt); + if (CLI.IsInReg) + Attrs.push_back(Attribute::InReg); + + return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, + Attrs); +} + +bool FastISel::LowerCallTo(const CallInst *CI, const char *SymName, + unsigned NumArgs) { + ImmutableCallSite CS(CI); + + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); + + ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + for (unsigned ArgI = 0; ArgI != NumArgs; ++ArgI) { + Value *V = CI->getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + ArgListEntry Entry; + Entry.Val = V; + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, ArgI + 1); + Args.push_back(Entry); + } + + CallLoweringInfo CLI; + CLI.setCallee(RetTy, FTy, SymName, std::move(Args), CS, NumArgs); + + return LowerCallTo(CLI); +} + +bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { + // Handle the incoming return values from the call. + CLI.clearIns(); + SmallVector<EVT, 4> RetTys; + ComputeValueVTs(TLI, CLI.RetTy, RetTys); + + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI); + + bool CanLowerReturn = TLI.CanLowerReturn(CLI.CallConv, *FuncInfo.MF, + CLI.IsVarArg, Outs, + CLI.RetTy->getContext()); + + // FIXME: sret demotion isn't supported yet - bail out. + if (!CanLowerReturn) + return false; + + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + + // Handle all of the outgoing arguments. + CLI.clearOuts(); + for (auto &Arg : CLI.getArgs()) { + Type *FinalType = Arg.Ty; + if (Arg.isByVal) + FinalType = cast<PointerType>(Arg.Ty)->getElementType(); + bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( + FinalType, CLI.CallConv, CLI.IsVarArg); + + ISD::ArgFlagsTy Flags; + if (Arg.isZExt) + Flags.setZExt(); + if (Arg.isSExt) + Flags.setSExt(); + if (Arg.isInReg) + Flags.setInReg(); + if (Arg.isSRet) + Flags.setSRet(); + if (Arg.isByVal) + Flags.setByVal(); + if (Arg.isInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling in + // the various CC lowering callbacks. + Flags.setByVal(); + } + if (Arg.isByVal || Arg.isInAlloca) { + PointerType *Ty = cast<PointerType>(Arg.Ty); + Type *ElementTy = Ty->getElementType(); + unsigned FrameSize = DL.getTypeAllocSize(ElementTy); + // For ByVal, alignment should come from FE. BE will guess if this info is + // not there, but there are cases it cannot get right. + unsigned FrameAlign = Arg.Alignment; + if (!FrameAlign) + FrameAlign = TLI.getByValTypeAlignment(ElementTy); + Flags.setByValSize(FrameSize); + Flags.setByValAlign(FrameAlign); + } + if (Arg.isNest) + Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); + unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty); + Flags.setOrigAlign(OriginalAlignment); + + CLI.OutVals.push_back(Arg.Val); + CLI.OutFlags.push_back(Flags); + } + + if (!FastLowerCall(CLI)) + return false; + + // Set all unused physreg defs as dead. + assert(CLI.Call && "No call instruction specified."); + CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI); + + if (CLI.NumResultRegs && CLI.CS) + UpdateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); + + return true; +} + +bool FastISel::LowerCall(const CallInst *CI) { + ImmutableCallSite CS(CI); + + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FuncTy = cast<FunctionType>(PT->getElementType()); + Type *RetTy = FuncTy->getReturnType(); + + ArgListTy Args; + ArgListEntry Entry; + Args.reserve(CS.arg_size()); + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + Entry.Val = V; + Entry.Ty = V->getType(); + + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Args.push_back(Entry); + } + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within FastLowerCall. + bool IsTailCall = CI->isTailCall(); + if (IsTailCall && !isInTailCallPosition(CS, TM)) + IsTailCall = false; + + CallLoweringInfo CLI; + CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS) + .setTailCall(IsTailCall); + + return LowerCallTo(CLI); +} + bool FastISel::SelectCall(const User *I) { const CallInst *Call = cast<CallInst>(I); // Handle simple inline asms. if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) { + // If the inline asm has side effects, then make sure that no local value + // lives across by flushing the local value map. + if (IA->hasSideEffects()) + flushLocalValueMap(); + // Don't attempt to handle constraints. if (!IA->getConstraintString().empty()) return false; @@ -572,7 +1053,7 @@ bool FastISel::SelectCall(const User *I) { if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::INLINEASM)) .addExternalSymbol(IA->getAsmString().c_str()) .addImm(ExtraInfo); @@ -582,26 +1063,37 @@ bool FastISel::SelectCall(const User *I) { MachineModuleInfo &MMI = FuncInfo.MF->getMMI(); ComputeUsesVAFloatArgument(*Call, &MMI); - const Function *F = Call->getCalledFunction(); - if (!F) return false; + // Handle intrinsic function calls. + if (const auto *II = dyn_cast<IntrinsicInst>(Call)) + return SelectIntrinsicCall(II); - // Handle selected intrinsic function calls. - switch (F->getIntrinsicID()) { + // Usually, it does not make sense to initialize a value, + // make an unrelated function call and use the value, because + // it tends to be spilled on the stack. So, we move the pointer + // to the last local value to the beginning of the block, so that + // all the values which have already been materialized, + // appear after the call. It also makes sense to skip intrinsics + // since they tend to be inlined. + flushLocalValueMap(); + + return LowerCall(Call); +} + +bool FastISel::SelectIntrinsicCall(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { default: break; - // At -O0 we don't care about the lifetime intrinsics. + // At -O0 we don't care about the lifetime intrinsics. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: - // The donothing intrinsic does, well, nothing. + // The donothing intrinsic does, well, nothing. case Intrinsic::donothing: return true; - case Intrinsic::dbg_declare: { - const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); + const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); DIVariable DIVar(DI->getVariable()); assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (!DIVar || - !FuncInfo.MF->getMMI().hasDebugInfo()) { + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) { DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } @@ -618,7 +1110,7 @@ bool FastISel::SelectCall(const User *I) { // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Op = MachineOperand::CreateFI(Offset); + Op = MachineOperand::CreateFI(Offset); if (!Op) if (unsigned Reg = lookUpRegForValue(Address)) Op = MachineOperand::CreateReg(Reg, false); @@ -643,15 +1135,15 @@ bool FastISel::SelectCall(const User *I) { if (Op) { if (Op->isReg()) { Op->setIsDebug(true); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, DI->getVariable()); } else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE)) - .addOperand(*Op) - .addImm(0) - .addMetadata(DI->getVariable()); + .addOperand(*Op) + .addImm(0) + .addMetadata(DI->getVariable()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -661,32 +1153,32 @@ bool FastISel::SelectCall(const User *I) { } case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. - const DbgValueInst *DI = cast<DbgValueInst>(Call); + const DbgValueInst *DI = cast<DbgValueInst>(II); const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(0U).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. bool IsIndirect = DI->getOffset() != 0; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, DI->getOffset(), DI->getVariable()); } else { // We can't yet handle anything else here because it would require @@ -696,36 +1188,30 @@ bool FastISel::SelectCall(const User *I) { return true; } case Intrinsic::objectsize: { - ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1)); + ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1)); unsigned long long Res = CI->isZero() ? -1ULL : 0; - Constant *ResCI = ConstantInt::get(Call->getType(), Res); + Constant *ResCI = ConstantInt::get(II->getType(), Res); unsigned ResultReg = getRegForValue(ResCI); if (ResultReg == 0) return false; - UpdateValueMap(Call, ResultReg); + UpdateValueMap(II, ResultReg); return true; } case Intrinsic::expect: { - unsigned ResultReg = getRegForValue(Call->getArgOperand(0)); + unsigned ResultReg = getRegForValue(II->getArgOperand(0)); if (ResultReg == 0) return false; - UpdateValueMap(Call, ResultReg); + UpdateValueMap(II, ResultReg); return true; } + case Intrinsic::experimental_stackmap: + return SelectStackmap(II); + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + return SelectPatchpoint(II); } - // Usually, it does not make sense to initialize a value, - // make an unrelated function call and use the value, because - // it tends to be spilled on the stack. So, we move the pointer - // to the last local value to the beginning of the block, so that - // all the values which have already been materialized, - // appear after the call. It also makes sense to skip intrinsics - // since they tend to be inlined. - if (!isa<IntrinsicInst>(Call)) - flushLocalValueMap(); - - // An arbitrary call. Bail. - return false; + return FastLowerIntrinsicCall(II); } bool FastISel::SelectCast(const User *I, unsigned Opcode) { @@ -798,8 +1284,8 @@ bool FastISel::SelectBitCast(const User *I) { // Don't attempt a cross-class copy. It will likely fail. if (SrcClass == DstClass) { ResultReg = createResultReg(DstClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Op0); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0); } } @@ -822,25 +1308,31 @@ FastISel::SelectInstruction(const Instruction *I) { if (!HandlePHINodesInSuccessorBlocks(I->getParent())) return false; - DL = I->getDebugLoc(); + DbgLoc = I->getDebugLoc(); MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; - // As a special case, don't handle calls to builtin library functions that - // may be translated directly to target instructions. if (const CallInst *Call = dyn_cast<CallInst>(I)) { const Function *F = Call->getCalledFunction(); LibFunc::Func Func; + + // As a special case, don't handle calls to builtin library functions that + // may be translated directly to target instructions. if (F && !F->hasLocalLinkage() && F->hasName() && LibInfo->getLibFunc(F->getName(), Func) && LibInfo->hasOptimizedCodeGen(Func)) return false; + + // Don't handle Intrinsic::trap if a trap funciton is specified. + if (F && F->getIntrinsicID() == Intrinsic::trap && + !TM.Options.getTrapFunctionName().empty()) + return false; } // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; - DL = DebugLoc(); + DbgLoc = DebugLoc(); return true; } // Remove dead code. However, ignore call instructions since we've flushed @@ -855,7 +1347,7 @@ FastISel::SelectInstruction(const Instruction *I) { SavedInsertPt = FuncInfo.InsertPt; if (TargetSelectInstruction(I)) { ++NumFastIselSuccessTarget; - DL = DebugLoc(); + DbgLoc = DebugLoc(); return true; } // Check for dead code and remove as necessary. @@ -863,7 +1355,7 @@ FastISel::SelectInstruction(const Instruction *I) { if (SavedInsertPt != FuncInfo.InsertPt) removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); - DL = DebugLoc(); + DbgLoc = DebugLoc(); return false; } @@ -871,8 +1363,7 @@ FastISel::SelectInstruction(const Instruction *I) { /// unless it is the immediate (fall-through) successor, and update /// the CFG. void -FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { - +FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // For more accurate line information if this is the only instruction @@ -880,10 +1371,14 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { // fall-through case, which needs no instructions. } else { // The unconditional branch case. - TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, - SmallVector<MachineOperand, 0>(), DL); + TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr, + SmallVector<MachineOperand, 0>(), DbgLoc); } - FuncInfo.MBB->addSuccessor(MSucc); + uint32_t BranchWeight = 0; + if (FuncInfo.BPI) + BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(), + MSucc->getBasicBlock()); + FuncInfo.MBB->addSuccessor(MSucc, BranchWeight); } /// SelectFNeg - Emit an FNeg operation. @@ -1035,8 +1530,10 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } case Instruction::Unreachable: - // Nothing to emit. - return true; + if (TM.Options.TrapUnreachable) + return FastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; + else + return true; case Instruction::Alloca: // FunctionLowering has the static-sized case covered. @@ -1092,11 +1589,12 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { FastISel::FastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FuncInfo(funcInfo), + MF(funcInfo.MF), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), TM(FuncInfo.MF->getTarget()), - TD(*TM.getDataLayout()), + DL(*TM.getDataLayout()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), TRI(*TM.getRegisterInfo()), @@ -1109,6 +1607,14 @@ bool FastISel::FastLowerArguments() { return false; } +bool FastISel::FastLowerCall(CallLoweringInfo &/*CLI*/) { + return false; +} + +bool FastISel::FastLowerIntrinsicCall(const IntrinsicInst * /*II*/) { + return false; +} + unsigned FastISel::FastEmit_(MVT, MVT, unsigned) { return 0; @@ -1204,29 +1710,48 @@ unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { return MRI.createVirtualRegister(RC); } +unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, + unsigned Op, unsigned OpNum) { + if (TargetRegisterInfo::isVirtualRegister(Op)) { + const TargetRegisterClass *RegClass = + TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); + if (!MRI.constrainRegClass(Op, RegClass)) { + // If it's not legal to COPY between the register classes, something + // has gone very wrong before we got here. + unsigned NewOp = createResultReg(RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), NewOp).addReg(Op); + return NewOp; + } + } + return Op; +} + unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg); return ResultReg; } unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; @@ -1236,19 +1761,22 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1258,21 +1786,25 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addReg(Op2, Op2IsKill * RegState::Kill); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addReg(Op2, Op2IsKill * RegState::Kill); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1281,19 +1813,22 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + RC = TII.getRegClass(II, II.getNumDefs(), &TRI, *FuncInfo.MF); + MRI.constrainRegClass(Op0, RC); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1302,21 +1837,23 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm1, uint64_t Imm2) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm1) .addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm1) .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1325,19 +1862,21 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1347,21 +1886,24 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1371,21 +1913,24 @@ unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm1, uint64_t Imm2) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm1).addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm1).addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1397,11 +1942,11 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg).addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1413,12 +1958,12 @@ unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addImm(Imm1).addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1432,7 +1977,7 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(TargetOpcode::COPY), ResultReg) + DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(Op0, getKillRegState(Op0IsKill), Idx); return ResultReg; } @@ -1498,9 +2043,9 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Set the DebugLoc for the copy. Prefer the location of the operand // if there is one; use the location of the PHI otherwise. - DL = PN->getDebugLoc(); + DbgLoc = PN->getDebugLoc(); if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp)) - DL = Inst->getDebugLoc(); + DbgLoc = Inst->getDebugLoc(); unsigned Reg = getRegForValue(PHIOp); if (Reg == 0) { @@ -1508,7 +2053,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { return false; } FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); - DL = DebugLoc(); + DbgLoc = DebugLoc(); } } @@ -1523,7 +2068,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { // this by scanning the single-use users of the load until we get to FoldInst. unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. - const Instruction *TheUser = LI->use_back(); + const Instruction *TheUser = LI->user_back(); while (TheUser != FoldInst && // Scan up until we find FoldInst. // Stay in the right block. TheUser->getParent() == FoldInst->getParent() && @@ -1532,7 +2077,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { if (!TheUser->hasOneUse()) return false; - TheUser = TheUser->use_back(); + TheUser = TheUser->user_back(); } // If we didn't find the fold instruction, then we failed to collapse the @@ -1559,7 +2104,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { return false; MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg); - MachineInstr *User = &*RI; + MachineInstr *User = RI->getParent(); // Set the insertion point properly. Folding the load can cause generation of // other random instructions (like sign extends) for addressing modes; make @@ -1576,8 +2121,8 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { if (!isa<AddOperator>(Add)) return false; // Type size needs to match. - if (TD.getTypeSizeInBits(GEP->getType()) != - TD.getTypeSizeInBits(Add->getType())) + if (DL.getTypeSizeInBits(GEP->getType()) != + DL.getTypeSizeInBits(Add->getType())) return false; // Must be in the same basic block. if (isa<Instruction>(Add) && @@ -1587,3 +2132,47 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1)); } +MachineMemOperand * +FastISel::createMachineMemOperandFor(const Instruction *I) const { + const Value *Ptr; + Type *ValTy; + unsigned Alignment; + unsigned Flags; + bool IsVolatile; + + if (const auto *LI = dyn_cast<LoadInst>(I)) { + Alignment = LI->getAlignment(); + IsVolatile = LI->isVolatile(); + Flags = MachineMemOperand::MOLoad; + Ptr = LI->getPointerOperand(); + ValTy = LI->getType(); + } else if (const auto *SI = dyn_cast<StoreInst>(I)) { + Alignment = SI->getAlignment(); + IsVolatile = SI->isVolatile(); + Flags = MachineMemOperand::MOStore; + Ptr = SI->getPointerOperand(); + ValTy = SI->getValueOperand()->getType(); + } else { + return nullptr; + } + + bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr; + bool IsInvariant = I->getMetadata("invariant.load") != nullptr; + const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa); + const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0. + Alignment = DL.getABITypeAlignment(ValTy); + + unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy); + + if (IsVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (IsNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + if (IsInvariant) + Flags |= MachineMemOperand::MOInvariant; + + return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, + Alignment, TBAAInfo, Ranges); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index e9d2324..ae124e8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "function-lowering-info" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/Analysis.h" @@ -21,8 +20,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -32,14 +31,16 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "function-lowering-info" + /// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by /// PHI nodes or outside of the basic block that defines it, or used by a /// switch or atomic instruction, which may expand to multiple basic blocks. @@ -47,12 +48,10 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { if (I->use_empty()) return false; if (isa<PHINode>(I)) return true; const BasicBlock *BB = I->getParent(); - for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) { - const User *U = *UI; + for (const User *U : I->users()) if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U)) return true; - } + return false; } @@ -76,7 +75,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // them. Function::const_iterator BB = Fn->begin(), EB = Fn->end(); for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + // Don't fold inalloca allocas or other dynamic allocas into the initial + // stack frame allocation, even if they are in the entry block. + if (!AI->isStaticAlloca()) + continue; + if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { Type *Ty = AI->getAllocatedType(); uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); @@ -87,17 +91,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. - // The object may need to be placed onto the stack near the stack - // protector if one exists. Determine here if this object is a suitable - // candidate. I.e., it would trigger the creation of a stack protector. - bool MayNeedSP = - (AI->isArrayAllocation() || - (TySize >= 8 && isa<ArrayType>(Ty) && - cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, - MayNeedSP, AI); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); } + } for (; BB != EB; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); @@ -120,7 +117,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Look for inline asm that clobbers the SP register. if (isa<CallInst>(I) || isa<InvokeInst>(I)) { ImmutableCallSite CS(I); - if (const InlineAsm *IA = dyn_cast<InlineAsm>(CS.getCalledValue())) { + if (isa<InlineAsm>(CS.getCalledValue())) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); std::vector<TargetLowering::AsmOperandInfo> Ops = TLI->ParseConstraints(CS); @@ -287,11 +284,11 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { const FunctionLoweringInfo::LiveOutInfo * FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { if (!LiveOutRegInfo.inBounds(Reg)) - return NULL; + return nullptr; LiveOutInfo *LOI = &LiveOutRegInfo[Reg]; if (!LOI->IsValid) - return NULL; + return nullptr; if (BitWidth > LOI->KnownZero.getBitWidth()) { LOI->NumSignBits = 1; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 856ef34..7c124b8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instr-emitter" #include "InstrEmitter.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/Statistic.h" @@ -31,6 +30,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "instr-emitter" + /// MinRCSize - Smallest register class we allow when constraining virtual /// registers. If satisfying all register class constraints would require /// using a smaller register class, emit a COPY to a new virtual register @@ -99,7 +100,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; - const TargetRegisterClass *UseRC = NULL; + const TargetRegisterClass *UseRC = nullptr; MVT VT = Node->getSimpleValueType(ResNo); // Stick to the preferred register classes for legal types. @@ -107,9 +108,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UseRC = TLI->getRegClassFor(VT); if (!IsClone && !IsCloned) - for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); - UI != E; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Node->uses()) { bool Match = true; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && @@ -131,7 +130,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, Match = false; if (User->isMachineOpcode()) { const MCInstrDesc &II = TII->get(User->getMachineOpcode()); - const TargetRegisterClass *RC = 0; + const TargetRegisterClass *RC = nullptr; if (i+II.getNumDefs() < II.getNumOperands()) { RC = TRI->getAllocatableClass( TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF)); @@ -154,7 +153,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, break; } - const TargetRegisterClass *SrcRC = 0, *DstRC = 0; + const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr; SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); // Figure out the register class to create for the destreg. @@ -242,9 +241,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, } if (!VRBase && !IsClone && !IsCloned) - for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); - UI != E; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Node->uses()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { @@ -329,7 +326,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, // shrink VReg's register class within reason. For example, if VReg == GR32 // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP. if (II) { - const TargetRegisterClass *DstRC = 0; + const TargetRegisterClass *DstRC = nullptr; if (IIOpNum < II->getNumOperands()) DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { @@ -470,9 +467,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. - for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); - UI != E; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Node->uses()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); @@ -561,10 +556,10 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, const ConstantSDNode *SD = cast<ConstantSDNode>(N0); MIB.addImm(SD->getZExtValue()); } else - AddOperand(MIB, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, + AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add the subregster being inserted - AddOperand(MIB, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, + AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); MIB.addImm(SubIdx); MBB->insert(InsertPos, MIB); @@ -693,10 +688,13 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addReg(0U); } - if (Offset != 0) // Indirect addressing. + // Indirect addressing is indicated by an Imm as the second parameter. + if (SD->isIndirect()) MIB.addImm(Offset); - else + else { + assert(Offset == 0 && "direct value cannot have an offset"); MIB.addReg(0U, RegState::Debug); + } MIB.addMetadata(MDPtr); @@ -738,19 +736,25 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NumDefs = II.getNumDefs(); - const uint16_t *ScratchRegs = NULL; - - // Handle PATCHPOINT specially and then use the generic code. - if (Opc == TargetOpcode::PATCHPOINT) { - unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); - NumDefs = NumResults; + const MCPhysReg *ScratchRegs = nullptr; + + // Handle STACKMAP and PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { + // Stackmaps do not have arguments and do not preserve their calling + // convention. However, to simplify runtime support, they clobber the same + // scratch registers as AnyRegCC. + unsigned CC = CallingConv::AnyReg; + if (Opc == TargetOpcode::PATCHPOINT) { + CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); + NumDefs = NumResults; + } ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); } unsigned NumImpUses = 0; unsigned NodeOperands = countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); - bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0; + bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -976,7 +980,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (unsigned j = 0; j != NumVals; ++j, ++i) - AddOperand(MIB, Node->getOperand(i), 0, 0, VRBaseMap, + AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Manually set isTied bits. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9061ae9..16c5b4b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -13,15 +13,16 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -152,10 +153,10 @@ private: public: // DAGUpdateListener implementation. - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { ForgetNode(N); } - virtual void NodeUpdated(SDNode *N) {} + void NodeUpdated(SDNode *N) override {} // Node replacement helpers void ReplacedNode(SDNode *N) { @@ -269,7 +270,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { EVT OrigVT = VT; EVT SVT = VT; - while (SVT != MVT::f32) { + while (SVT != MVT::f32 && SVT != MVT::f16) { SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from @@ -386,9 +387,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, MinAlign(ST->getAlignment(), Offset), ST->getTBAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue Result = - DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); return; } @@ -505,8 +504,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, false, false, 0)); // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, @@ -704,7 +702,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { } } } - return SDValue(0, 0); + return SDValue(nullptr, 0); } void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { @@ -729,10 +727,11 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { MVT VT = Value.getSimpleValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: + case TargetLowering::Legal: { // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + unsigned AS = ST->getAddressSpace(); + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) @@ -740,6 +739,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG, TLI, this); } break; + } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res.getNode()) @@ -807,7 +807,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Value.getValueType()))); + TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -818,7 +818,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the top RoundWidth bits. Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Value.getValueType()))); + TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); @@ -826,7 +826,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -840,16 +840,18 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(), StVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: + case TargetLowering::Legal: { + unsigned AS = ST->getAddressSpace(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } break; + } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res.getNode()) @@ -889,10 +891,11 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: + case TargetLowering::Legal: { + unsigned AS = LD->getAddressSpace(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); @@ -901,6 +904,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } } break; + } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(RVal, DAG); if (Res.getNode()) { @@ -1017,7 +1021,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1047,7 +1051,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1059,77 +1063,82 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: - isCustom = true; - // FALLTHROUGH + isCustom = true; + // FALLTHROUGH case TargetLowering::Legal: { - Value = SDValue(Node, 0); - Chain = SDValue(Node, 1); - - if (isCustom) { - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) { - Value = Res; - Chain = Res.getValue(1); - } - } else { - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = - LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getDataLayout()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, Value, Chain); - } - } - } - break; + Value = SDValue(Node, 0); + Chain = SDValue(Node, 1); + + if (isCustom) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { + Value = Res; + Chain = Res.getValue(1); + } + } else { + // If this is an unaligned load and the target doesn't support + // it, expand it. + EVT MemVT = LD->getMemoryVT(); + unsigned AS = LD->getAddressSpace(); + if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) { + Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getDataLayout()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, Value, Chain); + } + } + } + break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, - LD->getMemOperand()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); - break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); - } - Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Chain = Load.getValue(1); - break; - } - - assert(!SrcVT.isVector() && - "Vector Loads are handled in LegalizeVectorOps"); - - // FIXME: This does not work for vectors on most targets. Sign- and - // zero-extend operations are currently folded into extending loads, - // whether they are legal or not, and then we end up here without any - // support for legalizing them. - assert(ExtType != ISD::EXTLOAD && - "EXTLOAD should always be supported!"); - // Turn the unsupported load into an EXTLOAD followed by an explicit - // zero/sign extend inreg. - SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Chain, Ptr, SrcVT, - LD->getMemOperand()); - SDValue ValRes; - if (ExtType == ISD::SEXTLOAD) - ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else - ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Value = ValRes; - Chain = Result.getValue(1); - break; + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && + TLI.isTypeLegal(SrcVT)) { + SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, + LD->getMemOperand()); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); + break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); + } + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); + break; + } + + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); + + // FIXME: This does not work for vectors on most targets. Sign- + // and zero-extend operations are currently folded into extending + // loads, whether they are legal or not, and then we end up here + // without any support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an + // explicit zero/sign extend inreg. + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, + Node->getValueType(0), + Chain, Ptr, SrcVT, + LD->getMemOperand()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, + SrcVT.getScalarType()); + Value = ValRes; + Chain = Result.getValue(1); + break; } } @@ -1177,6 +1186,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action != TargetLowering::Promote) Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; + case ISD::FP_TO_FP16: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: @@ -1256,6 +1266,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::READ_REGISTER: + case ISD::WRITE_REGISTER: + // Named register is legal in the DAG, but blocked by register name + // selection if not implemented by target (to chose the correct register) + // They'll be converted to Copy(To/From)Reg. + Action = TargetLowering::Legal; + break; case ISD::DEBUGTRAP: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action == TargetLowering::Expand) { @@ -1383,10 +1400,39 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); SDLoc dl(Op); - // Store the value to a temporary stack slot, then LOAD the returned part. - SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); + + // Before we generate a new store to a temporary stack slot, see if there is + // already one that we can use. There often is because when we scalarize + // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole + // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in + // the vector. If all are expanded here, we don't want one store per vector + // element. + SDValue StackPtr, Ch; + for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), + UE = Vec.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) { + if (ST->isIndexed() || ST->isTruncatingStore() || + ST->getValue() != Vec) + continue; + + // Make sure that nothing else could have stored into the destination of + // this store. + if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode())) + continue; + + StackPtr = ST->getBasePtr(); + Ch = SDValue(ST, 0); + break; + } + } + + if (!Ch.getNode()) { + // Store the value to a temporary stack slot, then LOAD the returned part. + StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + } // Add the offset to the index. unsigned EltSize = @@ -1487,8 +1533,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { SDValue StoreChain; if (!Stores.empty()) // Not all undef elements? - StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); else StoreChain = DAG.getEntryNode(); @@ -1530,9 +1575,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { // the pointer so that the loaded integer will contain the sign bit. unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; - LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), - LoadPtr, - DAG.getConstant(ByteOffset, LoadPtr.getValueType())); + LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, + DAG.getConstant(ByteOffset, LoadPtr.getValueType())); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), false, false, false, 0); @@ -1555,8 +1599,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { // Select between the nabs and abs value based on the sign bit of // the input. return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, - DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), - AbsVal); + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, @@ -1609,8 +1653,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, /// If the SETCC has been legalized using the inverse condcode, then LHS and /// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert /// will be set to true. The caller must invert the result of the SETCC with -/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a -/// true/false result. +/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect +/// of a true/false result. /// /// \returns true if the SetCC has been legalized, false if it hasn't. bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, @@ -1776,6 +1820,98 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { false, false, false, 0); } +static bool +ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, + const TargetLowering &TLI, SDValue &Res) { + unsigned NumElems = Node->getNumOperands(); + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + + // Try to group the scalars into pairs, shuffle the pairs together, then + // shuffle the pairs of pairs together, etc. until the vector has + // been built. This will work only if all of the necessary shuffle masks + // are legal. + + // We do this in two phases; first to check the legality of the shuffles, + // and next, assuming that all shuffles are legal, to create the new nodes. + for (int Phase = 0; Phase < 2; ++Phase) { + SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals, + NewIntermedVals; + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + + SDValue Vec; + if (Phase) + Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V); + IntermedVals.push_back(std::make_pair(Vec, SmallVector<int, 16>(1, i))); + } + + while (IntermedVals.size() > 2) { + NewIntermedVals.clear(); + for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) { + // This vector and the next vector are shuffled together (simply to + // append the one to the other). + SmallVector<int, 16> ShuffleVec(NumElems, -1); + + SmallVector<int, 16> FinalIndices; + FinalIndices.reserve(IntermedVals[i].second.size() + + IntermedVals[i+1].second.size()); + + int k = 0; + for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f; + ++j, ++k) { + ShuffleVec[k] = j; + FinalIndices.push_back(IntermedVals[i].second[j]); + } + for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f; + ++j, ++k) { + ShuffleVec[k] = NumElems + j; + FinalIndices.push_back(IntermedVals[i+1].second[j]); + } + + SDValue Shuffle; + if (Phase) + Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first, + IntermedVals[i+1].first, + ShuffleVec.data()); + else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) + return false; + NewIntermedVals.push_back(std::make_pair(Shuffle, FinalIndices)); + } + + // If we had an odd number of defined values, then append the last + // element to the array of new vectors. + if ((IntermedVals.size() & 1) != 0) + NewIntermedVals.push_back(IntermedVals.back()); + + IntermedVals.swap(NewIntermedVals); + } + + assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 && + "Invalid number of intermediate vectors"); + SDValue Vec1 = IntermedVals[0].first; + SDValue Vec2; + if (IntermedVals.size() > 1) + Vec2 = IntermedVals[1].first; + else if (Phase) + Vec2 = DAG.getUNDEF(VT); + + SmallVector<int, 16> ShuffleVec(NumElems, -1); + for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i) + ShuffleVec[IntermedVals[0].second[i]] = i; + for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i) + ShuffleVec[IntermedVals[1].second[i]] = NumElems + i; + + if (Phase) + Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) + return false; + } + + return true; +} /// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't /// support the operation, but do support the resultant vector type. @@ -1850,25 +1986,38 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { false, false, false, Alignment); } - if (!MoreThanTwoValues) { - SmallVector<int, 8> ShuffleVec(NumElems, -1); - for (unsigned i = 0; i < NumElems; ++i) { - SDValue V = Node->getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - ShuffleVec[i] = V == Value1 ? 0 : NumElems; - } - if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { - // Get the splatted value into the low element of a vector register. - SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); - SDValue Vec2; - if (Value2.getNode()) - Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); - else - Vec2 = DAG.getUNDEF(VT); + SmallSet<SDValue, 16> DefinedValues; + for (unsigned i = 0; i < NumElems; ++i) { + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + DefinedValues.insert(Node->getOperand(i)); + } - // Return shuffle(LowValVec, undef, <0,0,0,0>) - return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) { + if (!MoreThanTwoValues) { + SmallVector<int, 8> ShuffleVec(NumElems, -1); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + ShuffleVec[i] = V == Value1 ? 0 : NumElems; + } + if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { + // Get the splatted value into the low element of a vector register. + SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); + SDValue Vec2; + if (Value2.getNode()) + Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); + else + Vec2 = DAG.getUNDEF(VT); + + // Return shuffle(LowValVec, undef, <0,0,0,0>) + return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + } + } else { + SDValue Res; + if (ExpandBVWithShuffles(Node, DAG, TLI, Res)) + return Res; } } @@ -1910,13 +2059,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, if (isTailCall) InChain = TCChain; - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), isTailCall, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, SDLoc(Node)); - std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); if (!CallInfo.second.getNode()) // It's a tailcall, return the chain (which is the DAG root). @@ -1945,12 +2093,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, TLI.getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo.first; @@ -1979,11 +2127,12 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, TLI.getPointerTy()); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, SDLoc(Node)); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo; @@ -2038,7 +2187,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; } - return TLI.getLibcallName(LC) != 0; + return TLI.getLibcallName(LC) != nullptr; } /// useDivRem - Only issue divrem libcall if both quotient and remainder are @@ -2116,11 +2265,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, TLI.getPointerTy()); SDLoc dl(Node); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); // Remainder is loaded back from the stack frame. @@ -2141,7 +2290,7 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { case MVT::f128: LC = RTLIB::SINCOS_F128; break; case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; } - return TLI.getLibcallName(LC) != 0; + return TLI.getLibcallName(LC) != nullptr; } /// canCombineSinCosLibcall - Return true if sincos libcall is available and @@ -2230,12 +2379,11 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, TLI.getPointerTy()); SDLoc dl(Node); - TargetLowering:: - CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()), - false, false, false, false, - 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), + Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args), 0); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, @@ -2503,12 +2651,15 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + // A larger signed type can hold all unsigned values of the requested type, + // so using FP_TO_SINT is valid if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { OpToUse = ISD::FP_TO_SINT; break; } - if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { + // However, if the value may be < 0.0, we *must* use some FP_TO_SINT. + if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { OpToUse = ISD::FP_TO_UINT; break; } @@ -2845,15 +2996,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If the target didn't lower this, lower it to '__sync_synchronize()' call // FIXME: handle "fence singlethread" more efficiently. TargetLowering::ArgListTy Args; - TargetLowering:: - CallLoweringInfo CLI(Node->getOperand(0), - Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__sync_synchronize", - TLI.getPointerTy()), - Args, DAG, dl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy()), std::move(Args), 0); + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); @@ -2862,13 +3011,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::ATOMIC_LOAD: { // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. SDValue Zero = DAG.getConstant(0, Node->getValueType(0)); - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - cast<AtomicSDNode>(Node)->getMemoryVT(), - Node->getOperand(0), - Node->getOperand(1), Zero, Zero, - cast<AtomicSDNode>(Node)->getMemOperand(), - cast<AtomicSDNode>(Node)->getOrdering(), - cast<AtomicSDNode>(Node)->getSynchScope()); + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, + Node->getOperand(0), Node->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); Results.push_back(Swap.getValue(0)); Results.push_back(Swap.getValue(1)); break; @@ -2905,6 +3055,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp.second); break; } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and + // splits out the success value as a comparison. Expanding the resulting + // ATOMIC_CMP_SWAP will produce a libcall. + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, + Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), + Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getSuccessOrdering(), + cast<AtomicSDNode>(Node)->getFailureOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + + SDValue Success = DAG.getSetCC(SDLoc(Node), Node->getValueType(1), + Res, Node->getOperand(2), ISD::SETEQ); + + Results.push_back(Res.getValue(0)); + Results.push_back(Success); + Results.push_back(Res.getValue(1)); + break; + } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; @@ -2925,14 +3096,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::TRAP: { // If this operation is not supported, lower it to 'abort()' call TargetLowering::ArgListTy Args; - TargetLowering:: - CallLoweringInfo CLI(Node->getOperand(0), - Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("abort", TLI.getPointerTy()), - Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("abort", TLI.getPointerTy()), + std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); @@ -2986,6 +3154,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; + case ISD::FP_TO_SINT: + if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; case ISD::FP_TO_UINT: { SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); @@ -3099,7 +3271,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); // BUILD_VECTOR operands are allowed to be wider than the element type. - // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it + // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept + // it. if (NewEltVT.bitsLT(EltVT)) { // Convert shuffle node. @@ -3107,8 +3280,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // cast operands to v8i32 and re-build the mask. // Calculate new VT, the size of the new VT should be equal to original. - EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, - VT.getSizeInBits()/NewEltVT.getSizeInBits()); + EVT NewVT = + EVT::getVectorVT(*DAG.getContext(), NewEltVT, + VT.getSizeInBits() / NewEltVT.getSizeInBits()); assert(NewVT.bitsEq(VT)); // cast operands to new VT @@ -3116,7 +3290,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); // Convert the shuffle mask - unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements(); + unsigned int factor = + NewVT.getVectorNumElements()/VT.getVectorNumElements(); // EltVT gets smaller assert(factor > 0); @@ -3155,7 +3330,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.getVectorIdxTy()))); } - Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); Results.push_back(Tmp1); @@ -3339,12 +3514,28 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); break; - case ISD::FP16_TO_FP32: - Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + case ISD::FP16_TO_FP: { + if (Node->getValueType(0) == MVT::f32) { + Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + break; + } + + // We can extend to types bigger than f32 in two steps without changing the + // result. Since "f16 -> f32" is much more commonly available, give CodeGen + // the option of emitting that before resorting to a libcall. + SDValue Res = + DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); + Results.push_back( + DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); break; - case ISD::FP32_TO_FP16: - Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false)); + } + case ISD::FP_TO_FP16: { + RTLIB::Libcall LC = + RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); + Results.push_back(ExpandLibCall(LC, Node, false)); break; + } case ISD::ConstantFP: { ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); // Check to see if this FP immediate is already legal. @@ -3476,6 +3667,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(1))); break; } + + SDValue Lo, Hi; + EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext()); + if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::SHL, VT) && + TLI.isOperationLegalOrCustom(ISD::OR, VT) && + TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) { + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); + SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), + TLI.getShiftAmountTy(HalfType)); + Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); + Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); + break; + } + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32, @@ -3491,7 +3699,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); - EVT OType = Node->getValueType(1); + EVT ResultType = Node->getValueType(1); + EVT OType = getSetCCResultType(Node->getValueType(0)); SDValue Zero = DAG.getConstant(0, LHS.getValueType()); @@ -3514,7 +3723,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); - Results.push_back(Cmp); + Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType)); break; } case ISD::UADDO: @@ -3525,9 +3734,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); - Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS, - Node->getOpcode () == ISD::UADDO ? - ISD::SETULT : ISD::SETUGT)); + + EVT ResultType = Node->getValueType(1); + EVT SetCCType = getSetCCResultType(Node->getValueType(0)); + ISD::CondCode CC + = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; + SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); + + Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType)); break; } case ISD::UMULO: @@ -3549,8 +3763,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, RHS); TopHalf = BottomHalf.getValue(1); - } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), - VT.getSizeInBits() * 2))) { + } else if (TLI.isTypeLegal(WideVT)) { LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); @@ -3708,7 +3921,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. if (NeedInvert) - Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0)); + Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); Results.push_back(Tmp1); break; @@ -3718,7 +3931,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); int TrueValue; - switch (TLI.getBooleanContents(VT.isVector())) { + switch (TLI.getBooleanContents(Tmp1->getValueType(0))) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: TrueValue = 1; @@ -3738,13 +3951,29 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp2 = Node->getOperand(1); // RHS Tmp3 = Node->getOperand(2); // True Tmp4 = Node->getOperand(3); // False + EVT VT = Node->getValueType(0); SDValue CC = Node->getOperand(4); + ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get(); + + if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) { + // If the condition code is legal, then we need to expand this + // node using SETCC and SELECT. + EVT CmpVT = Tmp1.getValueType(); + assert(!TLI.isOperationExpand(ISD::SELECT, VT) && + "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be " + "expanded."); + EVT CCVT = TLI.getSetCCResultType(*DAG.getContext(), CmpVT); + SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC); + Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4)); + break; + } + // SELECT_CC is legal, so the condition code must not be. bool Legalized = false; // Try to legalize by inverting the condition. This is for targets that // might support an ordered version of a condition, but not the unordered // version (or vice versa). - ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType().isInteger()); if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { // Use the new condition code and swap true and false @@ -3782,8 +4011,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); CC = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, - Tmp3, Tmp4, CC); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, + Tmp2, Tmp3, Tmp4, CC); } } Results.push_back(Tmp1); @@ -3813,8 +4042,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); Tmp4 = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, - Tmp3, Node->getOperand(4)); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, + Tmp2, Tmp3, Node->getOperand(4)); } Results.push_back(Tmp1); break; @@ -3845,8 +4074,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { VT.getScalarType(), Ex, Sh)); } SDValue Result = - DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Scalars[0], Scalars.size()); + DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Scalars); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -3976,7 +4204,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } case ISD::SELECT: { unsigned ExtOp, TruncOp; - if (Node->getValueType(0).isVector()) { + if (Node->getValueType(0).isVector() || + Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) { ExtOp = ISD::BITCAST; TruncOp = ISD::BITCAST; } else if (Node->getValueType(0).isInteger()) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index ecf4c5d..649dd7a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -24,6 +24,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + /// GetFPLibCall - Return the right libcall for the given floating point type. static RTLIB::Libcall GetFPLibCall(EVT VT, RTLIB::Libcall Call_F32, @@ -83,7 +85,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; - case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break; + case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; case ISD::FREM: R = SoftenFloatRes_FREM(N); break; @@ -371,6 +373,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); + + // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's + // entirely possible for both f16 and f32 to be legal, so use the fully + // hard-float FP_EXTEND rather than FP16_TO_FP. + if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) + Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; @@ -378,16 +387,29 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special // nodes? -SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { + EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, - SDLoc(N)).first; + SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1, + false, SDLoc(N)).first; + if (N->getValueType(0) == MVT::f32) + return Res32; + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); + return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); + if (N->getValueType(0) == MVT::f16) { + // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a + // storage-only type get a chance to select things. + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op); + } + RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; @@ -496,6 +518,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + if (N->getValueType(0) == MVT::f16) + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, @@ -623,10 +648,11 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; + case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; - case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; @@ -652,11 +678,32 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { GetSoftenedFloat(N->getOperand(0))); } +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { + // If we get here, the result must be legal but the source illegal. + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (SVT == MVT::f16) + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op); + + RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); + + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; +} + + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { + // We actually deal with the partially-softened FP_TO_FP16 node too, which + // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. + assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16); + EVT SVT = N->getOperand(0).getValueType(); EVT RVT = N->getValueType(0); + EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT; - RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT); + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -674,7 +721,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -702,13 +749,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { - EVT RVT = N->getValueType(0); - RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; -} - SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); @@ -720,7 +760,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -742,7 +782,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); // If softenSetCCOperands returned a scalar, use it. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { assert(NewLHS.getValueType() == N->getValueType(0) && "Unexpected setcc expansion!"); return NewLHS; @@ -1340,7 +1380,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -1433,7 +1473,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -1450,7 +1490,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, use it. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { assert(NewLHS.getValueType() == N->getValueType(0) && "Unexpected setcc expansion!"); return NewLHS; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 4255948..44d9e38 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -24,6 +24,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + //===----------------------------------------------------------------------===// // Integer Result Promotion //===----------------------------------------------------------------------===// @@ -97,7 +99,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; - case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break; + case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break; case ISD::AND: case ISD::OR: @@ -136,7 +138,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break; case ISD::ATOMIC_CMP_SWAP: - Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break; + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo); + break; } // If the result is null then the sub-method took care of registering it. @@ -190,16 +194,40 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { return Res; } -SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, + unsigned ResNo) { + if (ResNo == 1) { + assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + EVT SVT = getSetCCResultType(N->getOperand(2).getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + + // Only use the result of getSetCCResultType if it is legal, + // otherwise just use the promoted result type (NVT). + if (!TLI.isTypeLegal(SVT)) + SVT = NVT; + + SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs, + N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3), + N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(), + N->getSynchScope()); + ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); + ReplaceValueWith(SDValue(N, 2), Res.getValue(2)); + return Res.getValue(1); + } + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); - SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), - N->getMemoryVT(), N->getChain(), N->getBasePtr(), - Op2, Op3, N->getMemOperand(), N->getOrdering(), - N->getSynchScope()); - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + SDVTList VTs = + DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(), + N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(), + N->getFailureOrdering(), N->getSynchScope()); + // Update the use to N with the newly created Res. + for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i) + ReplaceValueWith(SDValue(N, i), Res.getValue(i)); return Res; } @@ -266,9 +294,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { EVT NVT = Op.getValueType(); SDLoc dl(N); - unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), - DAG.getConstant(DiffBits, TLI.getPointerTy())); + DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT))); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { @@ -372,7 +400,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { DAG.getValueType(N->getValueType(0).getScalarType())); } -SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); @@ -432,7 +460,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { EVT ValueVTs[] = { N->getValueType(0), NVT }; SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), - DAG.getVTList(ValueVTs, 2), Ops, 2); + DAG.getVTList(ValueVTs), Ops); // Modified the sum result - switch anything that used the old sum to use // the new one. @@ -490,7 +518,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - Mask = PromoteTargetBoolean(Mask, getSetCCResultType(OpTy)); + Mask = PromoteTargetBoolean(Mask, OpTy); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); return DAG.getNode(ISD::VSELECT, SDLoc(N), @@ -797,7 +825,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; - case ISD::FP16_TO_FP32: + case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; @@ -890,8 +918,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "only know how to promote condition"); // Promote all the way up to the canonical SetCC type. - EVT SVT = getSetCCResultType(MVT::Other); - SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); + SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other); // The chain (Op#0) and basic block destination (Op#2) are always legal types. return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, @@ -931,7 +958,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { for (unsigned i = 0; i < NumElts; ++i) NewOps.push_back(GetPromotedInteger(N->getOperand(i))); - return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { @@ -984,9 +1011,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - EVT SVT = getSetCCResultType(N->getOpcode() == ISD::SELECT ? - OpTy.getScalarType() : OpTy); - Cond = PromoteTargetBoolean(Cond, SVT); + EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy; + Cond = PromoteTargetBoolean(Cond, OpVT); return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1), N->getOperand(2)), 0); @@ -1141,6 +1167,26 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { ReplaceValueWith(SDValue(N, 1), Tmp.second); break; } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + AtomicSDNode *AN = cast<AtomicSDNode>(N); + SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other); + SDValue Tmp = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs, + N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), + AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(), + AN->getSynchScope()); + + // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine + // success simply by comparing the loaded value against the ingoing + // comparison. + SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp, + N->getOperand(2), ISD::SETEQ); + + SplitInteger(Tmp, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Success); + ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1)); + break; + } case ISD::AND: case ISD::OR: @@ -1270,6 +1316,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { /// and the shift amount is a constant 'Amt'. Expand the operation. void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi) { + assert(Amt && "Expected zero shifts to be already optimized away."); SDLoc DL(N); // Expand the incoming operand to be shifted, so that we have its parts SDValue InL, InH; @@ -1296,9 +1343,9 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, // Emit this X << 1 as X+X. SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); SDValue LoOps[2] = { InL, InL }; - Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps); SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; - Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps); } else { Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)); Hi = DAG.getNode(ISD::OR, DL, NVT, @@ -1372,7 +1419,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne); + DAG.computeKnownBits(N->getOperand(1), KnownZero, KnownOne); // If we don't know anything about the high bits, exit. if (((KnownZero|KnownOne) & HighBitMask) == 0) @@ -1547,20 +1594,20 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (hasCarry) { SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); if (N->getOpcode() == ISD::ADD) { - Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps); } else { - Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps); } return; } if (N->getOpcode() == ISD::ADD) { - Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); - Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); + Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps); + Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, @@ -1572,8 +1619,8 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, DAG.getConstant(1, NVT), Carry1); Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); } else { - Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); - Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); + Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps); + Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); @@ -1596,13 +1643,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, SDValue HiOps[3] = { LHSH, RHSH }; if (N->getOpcode() == ISD::ADDC) { - Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps); } else { - Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps); } // Legalized the flag result - switch anything that used the old flag to @@ -1621,9 +1668,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; SDValue HiOps[3] = { LHSH, RHSH }; - Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3); + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps); // Legalized the flag result - switch anything that used the old flag to // use the new one. @@ -1712,9 +1759,13 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NBitWidth = NVT.getSizeInBits(); - const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue(); - Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT); - Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT); + auto Constant = cast<ConstantSDNode>(N); + const APInt &Cst = Constant->getAPIntValue(); + bool IsTarget = Constant->isTargetOpcode(); + bool IsOpaque = Constant->isOpaque(); + Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT, IsTarget, IsOpaque); + Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT, IsTarget, + IsOpaque); } void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, @@ -1923,73 +1974,12 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDLoc dl(N); - bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); - bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT); - bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT); - bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT); - if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { - SDValue LL, LH, RL, RH; - GetExpandedInteger(N->getOperand(0), LL, LH); - GetExpandedInteger(N->getOperand(1), RL, RH); - unsigned OuterBitSize = VT.getSizeInBits(); - unsigned InnerBitSize = NVT.getSizeInBits(); - unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); - unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); - - APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); - if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && - DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { - // The inputs are both zero-extended. - if (HasUMUL_LOHI) { - // We can emit a umul_lohi. - Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); - Hi = SDValue(Lo.getNode(), 1); - return; - } - if (HasMULHU) { - // We can emit a mulhu+mul. - Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); - Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); - return; - } - } - if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { - // The input values are both sign-extended. - if (HasSMUL_LOHI) { - // We can emit a smul_lohi. - Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); - Hi = SDValue(Lo.getNode(), 1); - return; - } - if (HasMULHS) { - // We can emit a mulhs+mul. - Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); - Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL); - return; - } - } - if (HasUMUL_LOHI) { - // Lo,Hi = umul LHS, RHS. - SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, - DAG.getVTList(NVT, NVT), LL, RL); - Lo = UMulLOHI; - Hi = UMulLOHI.getValue(1); - RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); - LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); - return; - } - if (HasMULHU) { - Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); - Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); - RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); - LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); - return; - } - } + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + + if (TLI.expandMUL(N, Lo, Hi, NVT, DAG, LL, LH, RL, RH)) + return; // If nothing else, we can make a libcall. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -2120,7 +2110,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); SDValue Ops[] = { LHSL, LHSH, ShiftOp }; - Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); + Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops); Hi = Lo.getValue(1); return; } @@ -2352,12 +2342,12 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, Args.push_back(Entry); SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); - TargetLowering:: - CallLoweringInfo CLI(Chain, RetTy, true, false, false, false, - 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Func, Args, DAG, dl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args), 0) + .setSExtResult(); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); SplitInteger(CallInfo.first, Lo, Hi); @@ -2442,15 +2432,18 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); EVT VT = cast<AtomicSDNode>(N)->getMemoryVT(); + SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); SDValue Zero = DAG.getConstant(0, VT); - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT, - N->getOperand(0), - N->getOperand(1), Zero, Zero, - cast<AtomicSDNode>(N)->getMemOperand(), - cast<AtomicSDNode>(N)->getOrdering(), - cast<AtomicSDNode>(N)->getSynchScope()); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, + cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0), + N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getSynchScope()); + ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); - ReplaceValueWith(SDValue(N, 1), Swap.getValue(1)); + ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); } //===----------------------------------------------------------------------===// @@ -2575,15 +2568,20 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // NOTE: on targets without efficient SELECT of bools, we can always use // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) - TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL); + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, + nullptr); SDValue Tmp1, Tmp2; - Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), - LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); + if (TLI.isTypeLegal(LHSLo.getValueType()) && + TLI.isTypeLegal(RHSLo.getValueType())) + Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); if (!Tmp1.getNode()) Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC); - Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); + if (TLI.isTypeLegal(LHSHi.getValueType()) && + TLI.isTypeLegal(RHSHi.getValueType())) + Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); if (!Tmp2.getNode()) Tmp2 = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()), @@ -2624,7 +2622,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -2642,7 +2640,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -2659,7 +2657,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, use it. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { assert(NewLHS.getValueType() == N->getValueType(0) && "Unexpected setcc expansion!"); return NewLHS; @@ -2907,7 +2905,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { Ops.push_back(Op); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); } @@ -2954,7 +2952,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { Ops.push_back(Op); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); } SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { @@ -3002,7 +3000,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); } SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { @@ -3058,6 +3056,5 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), - &NewOps[0], NewOps.size()); - } + return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), NewOps); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index eb13230..bd7dacf 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -22,6 +22,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + static cl::opt<bool> EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); @@ -159,7 +161,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { if (Mapped & 128) dbgs() << " WidenedVectors"; dbgs() << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -433,7 +435,7 @@ NodeDone: if (Failed) { I->dump(&DAG); dbgs() << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } #endif @@ -488,7 +490,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { // Some operands changed - update the node. if (!NewOps.empty()) { - SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size()); + SDNode *M = DAG.UpdateNodeOperands(N, NewOps); if (M != N) { // The node morphed into a different node. Normally for this to happen // the original node would have to be marked NewNode. However this can @@ -634,7 +636,7 @@ namespace { : SelectionDAG::DAGUpdateListener(dtl.getDAG()), DTL(dtl), NodesToAnalyze(nta) {} - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && N->getNodeId() != DAGTypeLegalizer::Processed && "Invalid node ID for RAUW deletion!"); @@ -655,7 +657,7 @@ namespace { NodesToAnalyze.insert(E); } - virtual void NodeUpdated(SDNode *N) { + void NodeUpdated(SDNode *N) override { // Node updates can mean pretty much anything. It is possible that an // operand was set to something already processed (f.e.) in which case // this node could become ready. Recompute its flags. @@ -736,7 +738,7 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { AnalyzeNewValue(Result); SDValue &OpEntry = PromotedIntegers[Op]; - assert(OpEntry.getNode() == 0 && "Node is already promoted!"); + assert(!OpEntry.getNode() && "Node is already promoted!"); OpEntry = Result; } @@ -747,7 +749,7 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { AnalyzeNewValue(Result); SDValue &OpEntry = SoftenedFloats[Op]; - assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); + assert(!OpEntry.getNode() && "Node is already converted to integer!"); OpEntry = Result; } @@ -761,7 +763,7 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { AnalyzeNewValue(Result); SDValue &OpEntry = ScalarizedVectors[Op]; - assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); + assert(!OpEntry.getNode() && "Node is already scalarized!"); OpEntry = Result; } @@ -787,7 +789,7 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, // Remember that this is the result of the node. std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; - assert(Entry.first.getNode() == 0 && "Node already expanded"); + assert(!Entry.first.getNode() && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; } @@ -814,7 +816,7 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, // Remember that this is the result of the node. std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; - assert(Entry.first.getNode() == 0 && "Node already expanded"); + assert(!Entry.first.getNode() && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; } @@ -843,7 +845,7 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, // Remember that this is the result of the node. std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; - assert(Entry.first.getNode() == 0 && "Node already split"); + assert(!Entry.first.getNode() && "Node already split"); Entry.first = Lo; Entry.second = Hi; } @@ -855,7 +857,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { AnalyzeNewValue(Result); SDValue &OpEntry = WidenedVectors[Op]; - assert(OpEntry.getNode() == 0 && "Node already widened!"); + assert(!OpEntry.getNode() && "Node already widened!"); OpEntry = Result; } @@ -1007,7 +1009,7 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, unsigned NumOps = N->getNumOperands(); SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), nullptr, 0, isSigned, dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); @@ -1049,11 +1051,12 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, TLI.getPointerTy()); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, SDLoc(Node)); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo; @@ -1062,11 +1065,14 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, /// PromoteTargetBoolean - Promote the given target boolean to a target boolean /// of the given type. A target boolean is an integer value, not necessarily of /// type i1, the bits of which conform to getBooleanContents. -SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { +/// +/// ValVT is the type of values that produced the boolean. +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { SDLoc dl(Bool); + EVT BoolVT = getSetCCResultType(ValVT); ISD::NodeType ExtendCode = - TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector())); - return DAG.getNode(ExtendCode, dl, VT, Bool); + TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT)); + return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index ce2ba01..117ff31 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -16,7 +16,6 @@ #ifndef SELECTIONDAG_LEGALIZETYPES_H #define SELECTIONDAG_LEGALIZETYPES_H -#define DEBUG_TYPE "legalize-types" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -168,7 +167,7 @@ private: SDNode *Node, bool isSigned); std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); - SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); + SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, @@ -221,7 +220,7 @@ private: SDValue PromoteIntRes_AssertZext(SDNode *N); SDValue PromoteIntRes_Atomic0(AtomicSDNode *N); SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); - SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); + SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo); SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); @@ -238,7 +237,7 @@ private: SDValue PromoteIntRes_CTTZ(SDNode *N); SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); - SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N); + SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); @@ -404,7 +403,7 @@ private: SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); SDValue SoftenFloatRes_FNEG(SDNode *N); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); - SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N); + SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); SDValue SoftenFloatRes_FPOW(SDNode *N); SDValue SoftenFloatRes_FPOWI(SDNode *N); @@ -426,10 +425,10 @@ private: bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N); - SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); @@ -540,7 +539,9 @@ private: SDValue ScalarizeVecOp_UnaryOp(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecOp_VSELECT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// // Vector Splitting Support: LegalizeVectorTypes.cpp @@ -644,6 +645,7 @@ private: bool WidenVectorOperand(SDNode *N, unsigned OpNo); SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); + SDValue WidenVecOp_EXTEND(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); @@ -671,13 +673,13 @@ private: LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper genWidenVectorStores - Helper function to generate a set of - /// stores to store a widen vector into non widen memory + /// stores to store a widen vector into non-widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); /// Helper genWidenVectorTruncStores - Helper function to generate a set of - /// stores to store a truncate widen vector into non widen memory + /// stores to store a truncate widen vector into non-widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index c749fde..7e2f7b6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -23,6 +23,8 @@ #include "llvm/IR/DataLayout.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + //===----------------------------------------------------------------------===// // Generic Result Expansion. //===----------------------------------------------------------------------===// @@ -58,12 +60,15 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypeExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); + if (TLI.hasBigEndianPartOrdering(InVT) != + TLI.hasBigEndianPartOrdering(OutVT)) + std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case TargetLowering::TypeSplitVector: GetSplitVector(InOp, Lo, Hi); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); @@ -78,9 +83,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); - llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); - if (TLI.isBigEndian()) + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); + std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); @@ -174,7 +179,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); } @@ -243,7 +248,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDLoc dl(N); LoadSDNode *LD = cast<LoadSDNode>(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT ValueVT = LD->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); unsigned Alignment = LD->getAlignment(); @@ -273,7 +279,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, Hi.getValue(1)); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use @@ -293,7 +299,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OVT)) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use @@ -352,7 +358,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { SmallVector<SDValue, 8> Ops; IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts); + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, + makeArrayRef(Ops.data(), NumElts)); return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } @@ -388,7 +395,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()), - &NewElts[0], NewElts.size()); + NewElts); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); @@ -447,7 +454,7 @@ SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); for (unsigned i = 1; i < NumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { @@ -456,8 +463,8 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDLoc dl(N); StoreSDNode *St = cast<StoreSDNode>(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), - St->getValue().getValueType()); + EVT ValueVT = St->getValue().getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); unsigned Alignment = St->getAlignment(); @@ -471,7 +478,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDValue Lo, Hi; GetExpandedOp(St->getValue(), Lo, Hi); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), @@ -518,7 +525,7 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Cond, CL, CH); else - llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl); + std::tie(CL, CH) = DAG.SplitVector(Cond, dl); } Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); @@ -540,7 +547,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3fb2d9b..507e7ff 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -37,12 +37,12 @@ class VectorLegalizer { const TargetLowering &TLI; bool Changed; // Keep track of whether anything changed - /// LegalizedNodes - For nodes that are of legal width, and that have more - /// than one use, this map indicates what regularized operand to use. This - /// allows us to avoid legalizing the same thing more than once. + /// For nodes that are of legal width, and that have more than one use, this + /// map indicates what regularized operand to use. This allows us to avoid + /// legalizing the same thing more than once. SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; - // Adds a node to the translation cache + /// \brief Adds a node to the translation cache. void AddLegalizedOperand(SDValue From, SDValue To) { LegalizedNodes.insert(std::make_pair(From, To)); // If someone requests legalization of the new node, return itself. @@ -50,35 +50,81 @@ class VectorLegalizer { LegalizedNodes.insert(std::make_pair(To, To)); } - // Legalizes the given node + /// \brief Legalizes the given node. SDValue LegalizeOp(SDValue Op); - // Assuming the node is legal, "legalize" the results + + /// \brief Assuming the node is legal, "legalize" the results. SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); - // Implements unrolling a VSETCC. + + /// \brief Implements unrolling a VSETCC. SDValue UnrollVSETCC(SDValue Op); - // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB - // isn't legal. - // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if - // SINT_TO_FLOAT and SHR on vectors isn't legal. + + /// \brief Implement expand-based legalization of vector operations. + /// + /// This is just a high-level routine to dispatch to specific code paths for + /// operations to legalize them. + SDValue Expand(SDValue Op); + + /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if + /// FSUB isn't legal. + /// + /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if + /// SINT_TO_FLOAT and SHR on vectors isn't legal. SDValue ExpandUINT_TO_FLOAT(SDValue Op); - // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + + /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. SDValue ExpandSEXTINREG(SDValue Op); - // Implement vselect in terms of XOR, AND, OR when blend is not supported - // by the target. + + /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and bitcasts to the proper + /// type. The contents of the bits in the extended part of each element are + /// undef. + SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place, bitcasts to the proper + /// type, then shifts left and arithmetic shifts right to introduce a sign + /// extension. + SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and blends zeros into + /// the remaining lanes, finally bitcasting to the proper type. + SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Expand bswap of vectors into a shuffle if legal. + SDValue ExpandBSWAP(SDValue Op); + + /// \brief Implement vselect in terms of XOR, AND, OR when blend is not + /// supported by the target. SDValue ExpandVSELECT(SDValue Op); SDValue ExpandSELECT(SDValue Op); SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); - // Implements vector promotion; this is essentially just bitcasting the - // operands to a different type and bitcasting the result back to the - // original type. - SDValue PromoteVectorOp(SDValue Op); - // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input - // operand to the next size up. - SDValue PromoteVectorOpINT_TO_FP(SDValue Op); - - public: + + /// \brief Implements vector promotion. + /// + /// This is essentially just bitcasting the operands to a different type and + /// bitcasting the result back to the original type. + SDValue Promote(SDValue Op); + + /// \brief Implements [SU]INT_TO_FP vector promotion. + /// + /// This is a [zs]ext of the input operand to the next size up. + SDValue PromoteINT_TO_FP(SDValue Op); + + /// \brief Implements FP_TO_[SU]INT vector promotion of the result type. + /// + /// It is promoted to the next size up integer type. The result is then + /// truncated back to the original type. + SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned); + +public: + /// \brief Begin legalizer the vector operations in the DAG. bool Run(); VectorLegalizer(SelectionDAG& dag) : DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} @@ -88,7 +134,7 @@ bool VectorLegalizer::Run() { // Before we start legalizing vector nodes, check if there are any vectors. bool HasVectors = false; for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) { + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { // Check if the values of the nodes contain vectors. We don't need to check // the operands because we are going to check their values at some point. for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); @@ -112,7 +158,7 @@ bool VectorLegalizer::Run() { // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) LegalizeOp(SDValue(I, 0)); // Finally, it's possible the root changed. Get the new root. @@ -148,8 +194,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) Ops.push_back(LegalizeOp(Node->getOperand(i))); - SDValue Result = - SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); + SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0); if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); @@ -249,6 +294,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_EXTEND: case ISD::FMA: case ISD::SIGN_EXTEND_INREG: + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: QueryType = Node->getValueType(0); break; case ISD::FP_ROUND_INREG: @@ -262,21 +310,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { case TargetLowering::Promote: - switch (Op.getOpcode()) { - default: - // "Promote" the operation by bitcasting - Result = PromoteVectorOp(Op); - Changed = true; - break; - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - // "Promote" the operation by extending the operand. - Result = PromoteVectorOpINT_TO_FP(Op); - Changed = true; - break; - } + Result = Promote(Op); + Changed = true; + break; + case TargetLowering::Legal: break; - case TargetLowering::Legal: break; case TargetLowering::Custom: { SDValue Tmp1 = TLI.LowerOperation(Op, DAG); if (Tmp1.getNode()) { @@ -286,21 +324,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // FALL THROUGH } case TargetLowering::Expand: - if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) - Result = ExpandSEXTINREG(Op); - else if (Node->getOpcode() == ISD::VSELECT) - Result = ExpandVSELECT(Op); - else if (Node->getOpcode() == ISD::SELECT) - Result = ExpandSELECT(Op); - else if (Node->getOpcode() == ISD::UINT_TO_FP) - Result = ExpandUINT_TO_FLOAT(Op); - else if (Node->getOpcode() == ISD::FNEG) - Result = ExpandFNEG(Op); - else if (Node->getOpcode() == ISD::SETCC) - Result = UnrollVSETCC(Op); - else - Result = DAG.UnrollVectorOp(Op.getNode()); - break; + Result = Expand(Op); } // Make sure that the generated code is itself legal. @@ -315,10 +339,23 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return Result; } -SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { - // Vector "promotion" is basically just bitcasting and doing the operation - // in a different type. For example, x86 promotes ISD::AND on v2i32 to - // v1i64. +SDValue VectorLegalizer::Promote(SDValue Op) { + // For a few operations there is a specific concept for promotion based on + // the operand's type. + switch (Op.getOpcode()) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // "Promote" the operation by extending the operand. + return PromoteINT_TO_FP(Op); + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + // Promote the operation by extending the operand. + return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); + } + + // The rest of the time, vector "promotion" is basically just bitcasting and + // doing the operation in a different type. For example, x86 promotes + // ISD::AND on v2i32 to v1i64. MVT VT = Op.getSimpleValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); @@ -333,12 +370,12 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { Operands[j] = Op.getOperand(j); } - Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); + Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); return DAG.getNode(ISD::BITCAST, dl, VT, Op); } -SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { +SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { // INT_TO_FP operations may require the input operand be promoted even // when the type is otherwise legal. EVT VT = Op.getOperand(0).getValueType(); @@ -352,14 +389,9 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { // // Increase the bitwidth of the element to the next pow-of-two // (which is greater than 8 bits). - unsigned NumElts = VT.getVectorNumElements(); - EVT EltVT = VT.getVectorElementType(); - EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits()); - assert(EltVT.isSimple() && "Promoting to a non-simple vector type!"); - - // Build a new vector type and check if it is legal. - MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + assert(NVT.isSimple() && "Promoting to a non-simple vector type!"); SDLoc dl(Op); SmallVector<SDValue, 4> Operands(Op.getNumOperands()); @@ -372,8 +404,36 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { Operands[j] = Op.getOperand(j); } - return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0], - Operands.size()); + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands); +} + +// For FP_TO_INT we promote the result type to a vector type with wider +// elements and then truncate the result. This is different from the default +// PromoteVector which uses bitcast to promote thus assumning that the +// promoted vector type has the same overall size. +SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + EVT VT = Op.getValueType(); + + EVT NewVT; + unsigned NewOpc; + while (1) { + NewVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + assert(NewVT.isSimple() && "Promoting to a non-simple vector type!"); + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) { + NewOpc = ISD::FP_TO_SINT; + break; + } + if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) { + NewOpc = ISD::FP_TO_UINT; + break; + } + } + + SDLoc loc(Op); + SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); } @@ -512,10 +572,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { } } - SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &LoadChains[0], LoadChains.size()); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, - Op.getNode()->getValueType(0), &Vals[0], Vals.size()); + Op.getNode()->getValueType(0), Vals); AddLegalizedOperand(Op.getValue(0), Value); AddLegalizedOperand(Op.getValue(1), NewChain); @@ -569,12 +628,38 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { Stores.push_back(Store); } - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); AddLegalizedOperand(Op, TF); return TF; } +SDValue VectorLegalizer::Expand(SDValue Op) { + switch (Op->getOpcode()) { + case ISD::SIGN_EXTEND_INREG: + return ExpandSEXTINREG(Op); + case ISD::ANY_EXTEND_VECTOR_INREG: + return ExpandANY_EXTEND_VECTOR_INREG(Op); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return ExpandSIGN_EXTEND_VECTOR_INREG(Op); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return ExpandZERO_EXTEND_VECTOR_INREG(Op); + case ISD::BSWAP: + return ExpandBSWAP(Op); + case ISD::VSELECT: + return ExpandVSELECT(Op); + case ISD::SELECT: + return ExpandSELECT(Op); + case ISD::UINT_TO_FP: + return ExpandUINT_TO_FLOAT(Op); + case ISD::FNEG: + return ExpandFNEG(Op); + case ISD::SETCC: + return UnrollVSETCC(Op); + default: + return DAG.UnrollVectorOp(Op.getNode()); + } +} + SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // Lower a select instruction where the condition is a scalar and the // operands are vectors. Lower this select to VSELECT and implement it @@ -614,7 +699,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // Broadcast the mask so that the entire vector is all-one or all zero. SmallVector<SDValue, 8> Ops(NumElem, Mask); - Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size()); + Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, Ops); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because @@ -652,6 +737,108 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); } +// Generically expand a vector anyext in register to a shuffle of the relevant +// lanes into the appropriate locations, with other lanes left undef. +SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build a base mask of undef shuffles. + SmallVector<int, 16> ShuffleMask; + ShuffleMask.resize(NumSrcElements, -1); + + // Place the extended lanes into the correct locations. + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = i; + + return DAG.getNode( + ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); +} + +SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // First build an any-extend node which can be legalized above when we + // recurse through it. + Op = DAG.getAnyExtendVectorInReg(Src, DL, VT); + + // Now we need sign extend. Do this by shifting the elements. Even if these + // aren't legal operations, they have a better chance of being legalized + // without full scalarization than the sign extension does. + unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); + unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); + SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT); + return DAG.getNode(ISD::SRA, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), + ShiftAmount); +} + +// Generically expand a vector zext in register to a shuffle of the relevant +// lanes into the appropriate locations, a blend of zero into the high bits, +// and a bitcast to the wider element type. +SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build up a zero vector to blend into this one. + EVT SrcScalarVT = SrcVT.getScalarType(); + SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT); + SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); + + // Shuffle the incoming lanes into the correct position, and pull all other + // lanes from the zero vector. + SmallVector<int, 16> ShuffleMask; + ShuffleMask.reserve(NumSrcElements); + for (int i = 0; i < NumSrcElements; ++i) + ShuffleMask.push_back(i); + + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; + + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); +} + +SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { + EVT VT = Op.getValueType(); + + // Generate a byte wise shuffle mask for the BSWAP. + SmallVector<int, 16> ShuffleMask; + int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; + for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) + for (int J = ScalarSizeInBytes - 1; J >= 0; --J) + ShuffleMask.push_back((I * ScalarSizeInBytes) + J); + + EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); + + // Only emit a shuffle if the mask is legal. + if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) + return DAG.UnrollVectorOp(Op.getNode()); + + SDLoc DL(Op); + Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), + ShuffleMask.data()); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -672,9 +859,9 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // FIXME: Sign extend 1 to all ones if thats legal on the target. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || - TLI.getBooleanContents(true) != - TargetLowering::ZeroOrNegativeOneBooleanContent) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(Op1.getValueType()) != + TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); // If the mask and the type are different sizes, unroll the vector op. This @@ -769,7 +956,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { (EltVT.getSizeInBits()), EltVT), DAG.getConstant(0, EltVT)); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index fb8c602..f77c592 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -26,6 +26,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + //===----------------------------------------------------------------------===// // Result Vector Scalarization: <1 x ty> -> ty. //===----------------------------------------------------------------------===// @@ -65,6 +67,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::ANY_EXTEND: + case ISD::BSWAP: case ISD::CTLZ: case ISD::CTPOP: case ISD::CTTZ: @@ -254,8 +257,26 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { SDValue Cond = GetScalarizedVector(N->getOperand(0)); SDValue LHS = GetScalarizedVector(N->getOperand(1)); - TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false); - TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true); + TargetLowering::BooleanContent ScalarBool = + TLI.getBooleanContents(false, false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false); + + // If integer and float booleans have different contents then we can't + // reliably optimize in all cases. There is a full explanation for this in + // DAGCombiner::visitSELECT() where the same issue affects folding + // (select C, 0, 1) to (xor C, 1). + if (TLI.getBooleanContents(false, false) != + TLI.getBooleanContents(false, true)) { + // At least try the common case where the boolean is generated by a + // comparison. + if (Cond->getOpcode() == ISD::SETCC) { + EVT OpVT = Cond->getOperand(0)->getValueType(0); + ScalarBool = TLI.getBooleanContents(OpVT.getScalarType()); + VecBool = TLI.getBooleanContents(OpVT); + } else + ScalarBool = TargetLowering::UndefinedBooleanContent; + } + if (ScalarBool != VecBool) { EVT CondVT = Cond.getValueType(); switch (ScalarBool) { @@ -330,19 +351,31 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operand types must be vectors"); - - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - SDValue RHS = GetScalarizedVector(N->getOperand(1)); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT OpVT = LHS.getValueType(); EVT NVT = N->getValueType(0).getVectorElementType(); SDLoc DL(N); + // The result needs scalarizing, but it's not a given that the source does. + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + LHS = GetScalarizedVector(LHS); + RHS = GetScalarizedVector(RHS); + } else { + EVT VT = OpVT.getVectorElementType(); + LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, + DAG.getConstant(0, TLI.getVectorIdxTy())); + RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, + DAG.getConstant(0, TLI.getVectorIdxTy())); + } + // Turn it into a scalar SETCC. SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); // Vectors may have a different boolean contents to scalars. Promote the // value appropriately. ISD::NodeType ExtendCode = - TargetLowering::getExtendForContent(TLI.getBooleanContents(true)); + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); return DAG.getNode(ExtendCode, DL, NVT, Res); } @@ -357,7 +390,7 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { dbgs() << "\n"); SDValue Res = SDValue(); - if (Res.getNode() == 0) { + if (!Res.getNode()) { switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -381,9 +414,15 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_VECTOR_ELT: Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::VSELECT: + Res = ScalarizeVecOp_VSELECT(N); + break; case ISD::STORE: Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::FP_ROUND: + Res = ScalarizeVecOp_FP_ROUND(N, OpNo); + break; } } @@ -416,13 +455,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); - SmallVector<SDValue, 1> Ops(1); - Ops[0] = DAG.getNode(N->getOpcode(), SDLoc(N), - N->getValueType(0).getScalarType(), Elt); + SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), + N->getValueType(0).getScalarType(), Elt); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), - &Ops[0], 1); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op); } /// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - @@ -431,8 +468,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector<SDValue, 8> Ops(N->getNumOperands()); for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) Ops[i] = GetScalarizedVector(N->getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops); } /// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to @@ -446,6 +482,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { return Res; } + +/// ScalarizeVecOp_VSELECT - If the input condition is a vector that needs to be +/// scalarized, it must be <1 x i1>, so just convert to a normal ISD::SELECT +/// (still with vector output type since that was acceptable if we got here). +SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { + SDValue ScalarCond = GetScalarizedVector(N->getOperand(0)); + EVT VT = N->getValueType(0); + + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1), + N->getOperand(2)); +} + /// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be /// scalarized, it must be <1 x ty>. Just store the element. SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ @@ -467,6 +515,15 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getOriginalAlignment(), N->getTBAAInfo()); } +/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), + N->getValueType(0).getVectorElementType(), Elt, + N->getOperand(1)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} //===----------------------------------------------------------------------===// // Result Vector Splitting @@ -522,6 +579,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; + case ISD::BSWAP: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTTZ: @@ -625,7 +683,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, // We know the result is a vector. The input may be either a vector or a // scalar value. EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SDLoc dl(N); SDValue InOp = N->getOperand(0); @@ -680,13 +738,13 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned LoNumElts = LoVT.getVectorNumElements(); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); - Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); + Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps); SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end()); - Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size()); + Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps); } void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, @@ -701,13 +759,13 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, } EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); - Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); + Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps); SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end()); - Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size()); + Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps); } void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, @@ -717,7 +775,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); @@ -778,7 +836,7 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT()); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, @@ -842,7 +900,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); Hi = DAG.getUNDEF(HiVT); } @@ -852,7 +910,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); EVT LoVT, HiVT; SDLoc dl(LD); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Ch = LD->getChain(); @@ -866,7 +924,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT LoMemVT, HiMemVT; - llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, @@ -897,12 +955,12 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc DL(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the input. SDValue LL, LH, RL, RH; - llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -913,7 +971,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, // Get the dest types - they may not match the input types, e.g. int_to_fp. EVT LoVT, HiVT; SDLoc dl(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. @@ -921,7 +979,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) GetSplitVector(N->getOperand(0), Lo, Hi); else - llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); @@ -950,7 +1008,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, EVT SrcVT = N->getOperand(0).getValueType(); EVT DestVT = N->getValueType(0); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); // We can do better than a generic split operation if the extend is doing // more than just doubling the width of the elements and the following are @@ -976,7 +1034,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, EVT SplitSrcVT = EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); EVT SplitLoVT, SplitHiVT; - llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); + std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { DEBUG(dbgs() << "Split vector extend via incremental extend:"; @@ -985,7 +1043,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue NewSrc = DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); // Get the low and high halves of the new, extended one step, vector. - llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); // Extend those vector halves the rest of the way. Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); @@ -1088,7 +1146,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, } // Construct the Lo/Hi output using a BUILD_VECTOR. - Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size()); + Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps); } else if (InputUsed[0] == -1U) { // No input vectors were used! The result is undefined. Output = DAG.getUNDEF(NewVT); @@ -1124,7 +1182,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) return false; - if (Res.getNode() == 0) { + if (!Res.getNode()) { switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -1198,13 +1256,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { "Lo and Hi have differing types"); EVT LoOpVT, HiOpVT; - llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); + std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; - llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); - llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); - llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); + std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); + std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); + std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); SDValue LoSelect = DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); @@ -1319,7 +1377,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { GetSplitVector(N->getOperand(1), Lo, Hi); EVT LoMemVT, HiMemVT; - llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; @@ -1366,8 +1424,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), - &Elts[0], Elts.size()); + return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts); } SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { @@ -1408,7 +1465,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { // Extract the halves of the input via extract_subvector. SDValue InLoVec, InHiVec; - llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); + std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); // Truncate them to 1/2 the element size. EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, @@ -1510,7 +1567,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::AND: - case ISD::BSWAP: case ISD::MUL: case ISD::MULHS: case ISD::MULHU: @@ -1557,6 +1613,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Convert(N); break; + case ISD::BSWAP: case ISD::CTLZ: case ISD::CTPOP: case ISD::CTTZ: @@ -1724,8 +1781,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { while (SubConcatEnd < OpsToConcat) SubConcatOps[SubConcatEnd++] = undefVec; ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, - NextVT, &SubConcatOps[0], - OpsToConcat); + NextVT, SubConcatOps); ConcatEnd = SubConcatIdx + 1; } } @@ -1744,7 +1800,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { for (unsigned j = ConcatEnd; j < NumOps; ++j) ConcatOps[j] = UndefVal; } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + makeArrayRef(ConcatOps.data(), NumOps)); } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { @@ -1786,8 +1843,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(InVT); for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, - &Ops[0], NumConcat); + SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVec); return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1)); @@ -1822,7 +1878,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { @@ -1946,11 +2002,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { SDValue NewVec; if (InVT.isVector()) - NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, - NewInVT, &Ops[0], NewNumElts); + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); else - NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, - NewInVT, &Ops[0], NewNumElts); + NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); } } @@ -1975,7 +2029,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps); } SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { @@ -1998,7 +2052,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { Ops[i] = N->getOperand(i); for (unsigned i = NumOperands; i != NumConcat; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops); } } else { InputWidened = true; @@ -2044,7 +2098,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) Ops[Idx] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { @@ -2089,7 +2143,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat); + InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2122,7 +2176,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { @@ -2161,7 +2215,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { @@ -2189,8 +2243,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { if (LdChain.size() == 1) NewChain = LdChain[0]; else - NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, - &LdChain[0], LdChain.size()); + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); // Modified the chain - switch anything that used the old chain to use // the new one. @@ -2300,7 +2353,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { SDValue InOp1 = N->getOperand(0); EVT InVT = InOp1.getValueType(); - assert(InVT.isVector() && "can not widen non vector type"); + assert(InVT.isVector() && "can not widen non-vector type"); EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); InOp1 = GetWidenedVector(InOp1); @@ -2346,15 +2399,18 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = WidenVecOp_EXTEND(N); + break; + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: Res = WidenVecOp_Convert(N); break; } @@ -2375,6 +2431,68 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { return false; } +SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SDValue InOp = N->getOperand(0); + // If some legalization strategy other than widening is used on the operand, + // we can't safely assume that just extending the low lanes is the correct + // transformation. + if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) + return WidenVecOp_Convert(N); + InOp = GetWidenedVector(InOp); + assert(VT.getVectorNumElements() < + InOp.getValueType().getVectorNumElements() && + "Input wasn't widened!"); + + // We may need to further widen the operand until it has the same total + // vector size as the result. + EVT InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) { + EVT InEltVT = InVT.getVectorElementType(); + for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { + EVT FixedVT = (MVT::SimpleValueType)i; + EVT FixedEltVT = FixedVT.getVectorElementType(); + if (TLI.isTypeLegal(FixedVT) && + FixedVT.getSizeInBits() == VT.getSizeInBits() && + FixedEltVT == InEltVT) { + assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && + "Not enough elements in the fixed type for the operand!"); + assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && + "We can't have the same type as we started with!"); + if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) + InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, + DAG.getUNDEF(FixedVT), InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + else + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + break; + } + } + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) + // We couldn't find a legal vector type that was a widening of the input + // and could be extended in-register to the result type, so we have to + // scalarize. + return WidenVecOp_Convert(N); + } + + // Use special DAG nodes to represent the operation of extending the + // low lanes. + switch (N->getOpcode()) { + default: + llvm_unreachable("Extend legalization on on extend operation!"); + case ISD::ANY_EXTEND: + return DAG.getAnyExtendVectorInReg(InOp, DL, VT); + case ISD::SIGN_EXTEND: + return DAG.getSignExtendVectorInReg(InOp, DL, VT); + case ISD::ZERO_EXTEND: + return DAG.getZeroExtendVectorInReg(InOp, DL, VT); + } +} + SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal, it is unlikely // that we can fix the input to a legal type so unroll the convert @@ -2396,7 +2514,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, DAG.getConstant(i, TLI.getVectorIdxTy()))); - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { @@ -2445,7 +2563,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getConstant(j, TLI.getVectorIdxTy())); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { @@ -2474,8 +2592,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { if (StChain.size() == 1) return StChain[0]; else - return DAG.getNode(ISD::TokenFactor, SDLoc(ST), - MVT::Other,&StChain[0],StChain.size()); + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); } SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { @@ -2650,8 +2767,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, ConcatOps[0] = LdOp; for (unsigned i = 1; i != NumConcat; ++i) ConcatOps[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], - NumConcat); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); } // Load vector by using multiple loads from largest vector to scalar @@ -2685,8 +2801,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, Loads.push_back(DAG.getUNDEF(L->getValueType(0))); size += L->getValueSizeInBits(0); } - L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), - &Loads[0], Loads.size()); + L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads); } } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, @@ -2730,7 +2845,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, if (NewLdTy != LdTy) { // Create a larger vector ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, - &ConcatOps[Idx], End - Idx); + makeArrayRef(&ConcatOps[Idx], End - Idx)); Idx = End - 1; LdTy = NewLdTy; } @@ -2739,7 +2854,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, if (WidenWidth == LdTy.getSizeInBits()*(End - Idx)) return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, - &ConcatOps[Idx], End - Idx); + makeArrayRef(&ConcatOps[Idx], End - Idx)); // We need to fill the rest with undefs to build the vector unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); @@ -2752,7 +2867,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, for (; i != NumOps; ++i) WidenOps[i] = UndefVal; } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps); } SDValue @@ -2803,7 +2918,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, for (; i != WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } @@ -2949,7 +3064,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); } if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) @@ -2968,5 +3083,5 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { SDValue UndefVal = DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) Ops[Idx] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 1dd2128..624003f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -19,7 +19,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scheduler" #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -31,6 +30,8 @@ using namespace llvm; +#define DEBUG_TYPE "scheduler" + static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable use of DFA during scheduling")); @@ -49,7 +50,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : TLI = IS->getTargetLowering(); const TargetMachine &tm = (*IS->MF).getTarget(); - ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); + ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,nullptr); // This hard requirement could be relaxed, but for now // do not let it procede. assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); @@ -214,7 +215,7 @@ bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor /// of SU, return it, otherwise return null. SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { - SUnit *OnlyAvailablePred = 0; + SUnit *OnlyAvailablePred = nullptr; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { SUnit &Pred = *I->getSUnit(); @@ -222,7 +223,7 @@ SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { // We found an available, but not scheduled, predecessor. If it's the // only one we have found, keep track of it... otherwise give up. if (OnlyAvailablePred && OnlyAvailablePred != &Pred) - return 0; + return nullptr; OnlyAvailablePred = &Pred; } } @@ -441,7 +442,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { ResCount -= (regPressureDelta(SU) * ScaleTwo); } - // These are platform specific things. + // These are platform-specific things. // Will need to go into the back end // and accessed from here via a hook. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { @@ -581,7 +582,7 @@ void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { if (SU->isAvailable) return; // All preds scheduled. SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); - if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) + if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) return; // Okay, we found a single predecessor that is available, but not scheduled. @@ -598,12 +599,12 @@ void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { /// to be placed in scheduling sequence. SUnit *ResourcePriorityQueue::pop() { if (empty()) - return 0; + return nullptr; std::vector<SUnit *>::iterator Best = Queue.begin(); if (!DisableDFASched) { signed BestCost = SUSchedulingCost(*Best); - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { if (SUSchedulingCost(*I) > BestCost) { @@ -614,14 +615,14 @@ SUnit *ResourcePriorityQueue::pop() { } // Use default TD scheduling mechanism. else { - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; } SUnit *V = *Best; - if (Best != prior(Queue.end())) + if (Best != std::prev(Queue.end())) std::swap(*Best, Queue.back()); Queue.pop_back(); @@ -633,7 +634,7 @@ SUnit *ResourcePriorityQueue::pop() { void ResourcePriorityQueue::remove(SUnit *SU) { assert(!Queue.empty() && "Queue is empty!"); std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); - if (I != prior(Queue.end())) + if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 4af7172..ee54292 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -15,8 +15,8 @@ #define LLVM_CODEGEN_SDNODEDBGVALUE_H #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/DebugLoc.h" namespace llvm { @@ -45,14 +45,17 @@ private: unsigned FrameIx; // valid for stack objects } u; MDNode *mdPtr; + bool IsIndirect; uint64_t Offset; DebugLoc DL; unsigned Order; bool Invalid; public: // Constructor for non-constants. - SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl, - unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O), + SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, + bool indir, uint64_t off, DebugLoc dl, + unsigned O) : mdPtr(mdP), IsIndirect(indir), + Offset(off), DL(dl), Order(O), Invalid(false) { kind = SDNODE; u.s.Node = N; @@ -62,14 +65,16 @@ public: // Constructor for constants. SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl, unsigned O) : - mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { + mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = CONST; u.Const = C; } // Constructor for frame indices. SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : - mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { + mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = FRAMEIX; u.FrameIx = FI; } @@ -92,6 +97,9 @@ public: // Returns the FrameIx for a stack object unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; } + // Returns whether this is an indirect value. + bool isIndirect() { return IsIndirect; } + // Returns the offset. uint64_t getOffset() { return Offset; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 6c5e0ab..4d8c2c7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/SchedulerRegistry.h" #include "InstrEmitter.h" #include "ScheduleDAGSDNodes.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); STATISTIC(NumPRCopies, "Number of physical copies"); @@ -54,7 +55,7 @@ namespace { } SUnit *pop() { - if (empty()) return NULL; + if (empty()) return nullptr; SUnit *V = Queue.back(); Queue.pop_back(); return V; @@ -80,7 +81,7 @@ public: ScheduleDAGFast(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} - void Schedule(); + void Schedule() override; /// AddPred - adds a predecessor edge to SUnit SU. /// This returns true if this is a new predecessor. @@ -107,7 +108,7 @@ private: void ListScheduleBottomUp(); /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. - bool forceUnitLatencies() const { return true; } + bool forceUnitLatencies() const override { return true; } }; } // end anonymous namespace @@ -117,11 +118,11 @@ void ScheduleDAGFast::Schedule() { DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegDefs.resize(TRI->getNumRegs(), nullptr); LiveRegCycles.resize(TRI->getNumRegs(), 0); // Build the scheduling graph. - BuildSchedGraph(NULL); + BuildSchedGraph(nullptr); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -144,7 +145,7 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif --PredSU->NumSuccsLeft; @@ -198,7 +199,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { assert(LiveRegDefs[I->getReg()] == SU && "Physical register dependency violated?"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = NULL; + LiveRegDefs[I->getReg()] = nullptr; LiveRegCycles[I->getReg()] = 0; } } @@ -211,18 +212,18 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { /// successors to the newly created node. SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { if (SU->getNode()->getGluedNode()) - return NULL; + return nullptr; SDNode *N = SU->getNode(); if (!N) - return NULL; + return nullptr; SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue) - return NULL; + return nullptr; else if (VT == MVT::Other) TryUnfold = true; } @@ -230,13 +231,13 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { const SDValue &Op = N->getOperand(i); EVT VT = Op.getNode()->getValueType(Op.getResNo()); if (VT == MVT::Glue) - return NULL; + return nullptr; } if (TryUnfold) { SmallVector<SDNode*, 2> NewNodes; if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) - return NULL; + return nullptr; DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); @@ -388,11 +389,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVectorImpl<SUnit*> &Copies) { - SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL)); + SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(nullptr)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; - SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL)); + SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(nullptr)); CopyToSU->CopySrcRC = DestRC; CopyToSU->CopyDstRC = SrcRC; @@ -583,7 +584,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // and it is expensive. // If cross copy register class is null, then it's not possible to copy // the value at all. - SUnit *NewDef = 0; + SUnit *NewDef = nullptr; if (DestRC != RC) { NewDef = CopyAndMoveSuccessors(LRDef); if (!DestRC && !NewDef) @@ -646,9 +647,10 @@ class ScheduleDAGLinearize : public ScheduleDAGSDNodes { public: ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} - void Schedule(); + void Schedule() override; - MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + MachineBasicBlock * + EmitSchedule(MachineBasicBlock::iterator &InsertPos) override; private: std::vector<SDNode*> Sequence; @@ -660,7 +662,7 @@ private: void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { if (N->getNodeId() != 0) - llvm_unreachable(0); + llvm_unreachable(nullptr); if (!N->isMachineOpcode() && (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) @@ -673,7 +675,7 @@ void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { unsigned NumOps = N->getNumOperands(); if (unsigned NumLeft = NumOps) { - SDNode *GluedOpN = 0; + SDNode *GluedOpN = nullptr; do { const SDValue &Op = N->getOperand(NumLeft-1); SDNode *OpN = Op.getNode(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 1a562d7..dedca41 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/STLExtras.h" @@ -36,6 +35,8 @@ #include <climits> using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); @@ -163,13 +164,14 @@ public: CodeGenOpt::Level OptLevel) : ScheduleDAGSDNodes(mf), NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), - Topo(SUnits, NULL) { + Topo(SUnits, nullptr) { const TargetMachine &tm = mf.getTarget(); if (DisableSchedCycles || !NeedLatency) HazardRec = new ScheduleHazardRecognizer(); else - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( + tm.getSubtargetImpl(), this); } ~ScheduleDAGRRList() { @@ -177,7 +179,7 @@ public: delete AvailableQueue; } - void Schedule(); + void Schedule() override; ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } @@ -261,7 +263,7 @@ private: /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't /// need actual latency information but the hybrid scheduler does. - bool forceUnitLatencies() const { + bool forceUnitLatencies() const override { return !NeedLatency; } }; @@ -327,13 +329,13 @@ void ScheduleDAGRRList::Schedule() { NumLiveRegs = 0; // Allocate slots for each physical register, plus one for a special register // to track the virtual resource of a calling sequence. - LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); - LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); + LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr); + LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr); CallSeqEndForStart.clear(); assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); // Build the scheduling graph. - BuildSchedGraph(NULL); + BuildSchedGraph(nullptr); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -369,7 +371,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif --PredSU->NumSuccsLeft; @@ -461,7 +463,7 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, // to get to the CALLSEQ_BEGIN, but we need to find the path with the // most nesting in order to ensure that we find the corresponding match. if (N->getOpcode() == ISD::TokenFactor) { - SDNode *Best = 0; + SDNode *Best = nullptr; unsigned BestMaxNest = MaxNest; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { unsigned MyNestLevel = NestLevel; @@ -497,10 +499,10 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, N = N->getOperand(i).getNode(); goto found_chain_operand; } - return 0; + return nullptr; found_chain_operand:; if (N->getOpcode() == ISD::EntryToken) - return 0; + return nullptr; } } @@ -742,8 +744,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = NULL; - LiveRegGens[I->getReg()] = NULL; + LiveRegDefs[I->getReg()] = nullptr; + LiveRegGens[I->getReg()] = nullptr; releaseInterferences(I->getReg()); } } @@ -757,8 +759,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); --NumLiveRegs; - LiveRegDefs[CallResource] = NULL; - LiveRegGens[CallResource] = NULL; + LiveRegDefs[CallResource] = nullptr; + LiveRegGens[CallResource] = nullptr; releaseInterferences(CallResource); } } @@ -813,8 +815,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { assert(LiveRegDefs[I->getReg()] == I->getSUnit() && "Physical register dependency violated?"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = NULL; - LiveRegGens[I->getReg()] = NULL; + LiveRegDefs[I->getReg()] = nullptr; + LiveRegGens[I->getReg()] = nullptr; releaseInterferences(I->getReg()); } } @@ -841,8 +843,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); --NumLiveRegs; - LiveRegDefs[CallResource] = NULL; - LiveRegGens[CallResource] = NULL; + LiveRegDefs[CallResource] = nullptr; + LiveRegGens[CallResource] = nullptr; releaseInterferences(CallResource); } } @@ -855,7 +857,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { // This becomes the nearest def. Note that an earlier def may still be // pending if this is a two-address node. LiveRegDefs[I->getReg()] = SU; - if (LiveRegGens[I->getReg()] == NULL || + if (LiveRegGens[I->getReg()] == nullptr || I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) LiveRegGens[I->getReg()] = I->getSUnit(); } @@ -936,17 +938,17 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) { SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SDNode *N = SU->getNode(); if (!N) - return NULL; + return nullptr; if (SU->getNode()->getGluedNode()) - return NULL; + return nullptr; SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue) - return NULL; + return nullptr; else if (VT == MVT::Other) TryUnfold = true; } @@ -954,18 +956,18 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { const SDValue &Op = N->getOperand(i); EVT VT = Op.getNode()->getValueType(Op.getResNo()); if (VT == MVT::Glue) - return NULL; + return nullptr; } if (TryUnfold) { SmallVector<SDNode*, 2> NewNodes; if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) - return NULL; + return nullptr; // unfolding an x86 DEC64m operation results in store, dec, load which // can't be handled here so quit if (NewNodes.size() == 3) - return NULL; + return nullptr; DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); @@ -1136,11 +1138,11 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVectorImpl<SUnit*> &Copies) { - SUnit *CopyFromSU = CreateNewSUnit(NULL); + SUnit *CopyFromSU = CreateNewSUnit(nullptr); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; - SUnit *CopyToSU = CreateNewSUnit(NULL); + SUnit *CopyToSU = CreateNewSUnit(nullptr); CopyToSU->CopySrcRC = DestRC; CopyToSU->CopyDstRC = SrcRC; @@ -1244,7 +1246,7 @@ static const uint32_t *getNodeRegMask(const SDNode *N) { if (const RegisterMaskSDNode *Op = dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode())) return Op->getRegMask(); - return NULL; + return nullptr; } /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay @@ -1355,7 +1357,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { /// (2) No Hazards: resources are available /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { - SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop(); + SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop(); while (CurSU) { SmallVector<unsigned, 4> LRegs; if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) @@ -1371,7 +1373,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { Interferences.push_back(CurSU); } else { - assert(CurSU->isPending && "Intereferences are pending"); + assert(CurSU->isPending && "Interferences are pending"); // Update the interference with current live regs. LRegsPair.first->second = LRegs; } @@ -1389,7 +1391,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // Try unscheduling up to the point where it's safe to schedule // this node. - SUnit *BtSU = NULL; + SUnit *BtSU = nullptr; unsigned LiveCycle = UINT_MAX; for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { unsigned Reg = LRegs[j]; @@ -1449,7 +1451,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // expensive. // If cross copy register class is null, then it's not possible to copy // the value at all. - SUnit *NewDef = 0; + SUnit *NewDef = nullptr; if (DestRC != RC) { NewDef = CopyAndMoveSuccessors(LRDef); if (!DestRC && !NewDef) @@ -1539,7 +1541,6 @@ template<class SF> struct reverse_sort : public queue_sort { SF &SortFunc; reverse_sort(SF &sf) : SortFunc(sf) {} - reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} bool operator()(SUnit* left, SUnit* right) const { // reverse left/right rather than simply !SortFunc(left, right) @@ -1559,7 +1560,6 @@ struct bu_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} bool operator()(SUnit* left, SUnit* right) const; }; @@ -1574,8 +1574,6 @@ struct src_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - src_ls_rr_sort(const src_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} bool operator()(SUnit* left, SUnit* right) const; }; @@ -1590,8 +1588,6 @@ struct hybrid_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1609,8 +1605,6 @@ struct ilp_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1654,7 +1648,7 @@ public: const TargetLowering *tli) : SchedulingPriorityQueue(hasReadyFilter), CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), - MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { + MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); RegLimit.resize(NumRC); @@ -1675,14 +1669,14 @@ public: return scheduleDAG->getHazardRec(); } - void initNodes(std::vector<SUnit> &sunits); + void initNodes(std::vector<SUnit> &sunits) override; - void addNode(const SUnit *SU); + void addNode(const SUnit *SU) override; - void updateNode(const SUnit *SU); + void updateNode(const SUnit *SU) override; - void releaseState() { - SUnits = 0; + void releaseState() override { + SUnits = nullptr; SethiUllmanNumbers.clear(); std::fill(RegPressure.begin(), RegPressure.end(), 0); } @@ -1695,26 +1689,26 @@ public: return SU->getNode()->getIROrder(); } - bool empty() const { return Queue.empty(); } + bool empty() const override { return Queue.empty(); } - void push(SUnit *U) { + void push(SUnit *U) override { assert(!U->NodeQueueId && "Node in the queue already"); U->NodeQueueId = ++CurQueueId; Queue.push_back(U); } - void remove(SUnit *SU) { + void remove(SUnit *SU) override { assert(!Queue.empty() && "Queue is empty!"); assert(SU->NodeQueueId != 0 && "Not in queue!"); std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); - if (I != prior(Queue.end())) + if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); SU->NodeQueueId = 0; } - bool tracksRegPressure() const { return TracksRegPressure; } + bool tracksRegPressure() const override { return TracksRegPressure; } void dumpRegPressure() const; @@ -1724,9 +1718,9 @@ public: int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; - void scheduledNode(SUnit *SU); + void scheduledNode(SUnit *SU) override; - void unscheduledNode(SUnit *SU); + void unscheduledNode(SUnit *SU) override; protected: bool canClobber(const SUnit *SU, const SUnit *Op); @@ -1738,12 +1732,12 @@ protected: template<class SF> static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { std::vector<SUnit *>::iterator Best = Q.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Q.begin()), E = Q.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; SUnit *V = *Best; - if (Best != prior(Q.end())) + if (Best != std::prev(Q.end())) std::swap(*Best, Q.back()); Q.pop_back(); return V; @@ -1776,14 +1770,14 @@ public: tii, tri, tli), Picker(this) {} - bool isBottomUp() const { return SF::IsBottomUp; } + bool isBottomUp() const override { return SF::IsBottomUp; } - bool isReady(SUnit *U) const { + bool isReady(SUnit *U) const override { return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); } - SUnit *pop() { - if (Queue.empty()) return NULL; + SUnit *pop() override { + if (Queue.empty()) return nullptr; SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); V->NodeQueueId = 0; @@ -1791,7 +1785,7 @@ public: } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - void dump(ScheduleDAG *DAG) const { + void dump(ScheduleDAG *DAG) const override { // Emulate pop() without clobbering NodeQueueIds. std::vector<SUnit*> DumpQueue = Queue; SF DumpPicker = Picker; @@ -2832,7 +2826,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { continue; // Locate the single data predecessor. - SUnit *PredSU = 0; + SUnit *PredSU = nullptr; for (SUnit::const_pred_iterator II = SU->Preds.begin(), EE = SU->Preds.end(); II != EE; ++II) if (!II->isCtrl()) { @@ -2988,7 +2982,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); BURegReductionPriorityQueue *PQ = - new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); + new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -3002,7 +2996,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = - new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); + new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index c1893c9..de910b7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" #include "SDNodeDbgValue.h" @@ -35,6 +34,8 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + STATISTIC(LoadsClustered, "Number of loads clustered together"); // This allows latency based scheduler to notice high latency instructions @@ -46,7 +47,7 @@ static cl::opt<int> HighLatencyCycles( "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf), BB(0), DAG(0), + : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), InstrItins(mf.getTarget().getInstrItineraryData()) {} /// Run - perform scheduling. @@ -67,12 +68,12 @@ void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { /// SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { #ifndef NDEBUG - const SUnit *Addr = 0; + const SUnit *Addr = nullptr; if (!SUnits.empty()) Addr = &SUnits[0]; #endif SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); - assert((Addr == 0 || Addr == &SUnits[0]) && + assert((Addr == nullptr || Addr == &SUnits[0]) && "SUnits std::vector reallocated on the fly!"); SUnits.back().OrigNode = &SUnits.back(); SUnit *SU = &SUnits.back(); @@ -142,8 +143,8 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, if (ExtraOper.getNode()) Ops.push_back(ExtraOper); - SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); - MachineSDNode::mmo_iterator Begin = 0, End = 0; + SDVTList VTList = DAG->getVTList(VTs); + MachineSDNode::mmo_iterator Begin = nullptr, End = nullptr; MachineSDNode *MN = dyn_cast<MachineSDNode>(N); // Store memory references. @@ -152,7 +153,7 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, End = MN->memoperands_end(); } - DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size()); + DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops); // Reset the memory references if (MN) @@ -205,7 +206,7 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { - SDNode *Chain = 0; + SDNode *Chain = nullptr; unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) Chain = Node->getOperand(NumOps-1).getNode(); @@ -271,7 +272,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { // Cluster loads by adding MVT::Glue outputs and inputs. This also // ensure they are scheduled in order of increasing addresses. SDNode *Lead = Loads[0]; - SDValue InGlue = SDValue(0, 0); + SDValue InGlue = SDValue(nullptr, 0); if (AddGlue(Lead, InGlue, true, DAG)) InGlue = SDValue(Lead, Lead->getNumValues() - 1); for (unsigned I = 1, E = Loads.size(); I != E; ++I) { @@ -572,7 +573,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() { return; // Found a normal regdef. } Node = Node->getGluedNode(); - if (Node == NULL) { + if (!Node) { return; // No values left to visit. } InitNodeNumDefs(); @@ -743,13 +744,13 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() || // Fast-isel may have inserted some instructions, in which case the // BB->back().isPHI() test will not fire when we want it to. - prior(Emitter.getInsertPos())->isPHI()) { + std::prev(Emitter.getInsertPos())->isPHI()) { // Did not insert any instruction. - Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); + Orders.push_back(std::make_pair(Order, (MachineInstr*)nullptr)); return; } - Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); + Orders.push_back(std::make_pair(Order, std::prev(Emitter.getInsertPos()))); ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 2ff37e0..39ebadf 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -117,13 +117,13 @@ namespace llvm { virtual MachineBasicBlock* EmitSchedule(MachineBasicBlock::iterator &InsertPos); - virtual void dumpNode(const SUnit *SU) const; + void dumpNode(const SUnit *SU) const override; void dumpSchedule() const; - virtual std::string getGraphNodeLabel(const SUnit *SU) const; + std::string getGraphNodeLabel(const SUnit *SU) const override; - virtual std::string getDAGName() const; + std::string getDAGName() const override; virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; @@ -139,7 +139,7 @@ namespace llvm { public: RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); - bool IsValid() const { return Node != NULL; } + bool IsValid() const { return Node != nullptr; } MVT GetValue() const { assert(IsValid() && "bad iterator"); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 58aa1fe..4589b0c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -18,7 +18,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/Statistic.h" @@ -35,6 +34,8 @@ #include <climits> using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + STATISTIC(NumNoops , "Number of noops inserted"); STATISTIC(NumStalls, "Number of pipeline stalls"); @@ -72,7 +73,8 @@ public: : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { const TargetMachine &tm = mf.getTarget(); - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( + tm.getSubtargetImpl(), this); } ~ScheduleDAGVLIW() { @@ -80,7 +82,7 @@ public: delete AvailableQueue; } - void Schedule(); + void Schedule() override; private: void releaseSucc(SUnit *SU, const SDep &D); @@ -120,7 +122,7 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif assert(!D.isWeak() && "unexpected artificial DAG edge"); @@ -204,12 +206,12 @@ void ScheduleDAGVLIW::listScheduleTopDown() { // don't advance the hazard recognizer. if (AvailableQueue->empty()) { // Reset DFA state. - AvailableQueue->scheduledNode(0); + AvailableQueue->scheduledNode(nullptr); ++CurCycle; continue; } - SUnit *FoundSUnit = 0; + SUnit *FoundSUnit = nullptr; bool HasNoopHazards = false; while (!AvailableQueue->empty()) { @@ -256,7 +258,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() { // processors without pipeline interlocks and other cases. DEBUG(dbgs() << "*** Emitting noop\n"); HazardRec->EmitNoop(); - Sequence.push_back(0); // NULL here means noop + Sequence.push_back(nullptr); // NULL here means noop ++NumNoops; ++CurCycle; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 00ffe00..16f7349 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -18,17 +18,15 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" @@ -50,6 +48,7 @@ #include "llvm/Target/TargetSelectionDAGInfo.h" #include <algorithm> #include <cmath> + using namespace llvm; /// makeVTList - Return an instance of the SDVTList struct initialized with the @@ -149,33 +148,50 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { if (N->getOpcode() != ISD::BUILD_VECTOR) return false; - unsigned i = 0, e = N->getNumOperands(); - - // Skip over all of the undef values. - while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) - ++i; + bool IsAllUndef = true; + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + IsAllUndef = false; + // Do not accept build_vectors that aren't all constants or which have non-0 + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target + // and a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all zeros, not whether the individual + // constants are. + SDValue Zero = N->getOperand(i); + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) { + if (CN->getAPIntValue().countTrailingZeros() < EltSize) + return false; + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) { + if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize) + return false; + } else + return false; + } // Do not accept an all-undef vector. - if (i == e) return false; + if (IsAllUndef) + return false; + return true; +} - // Do not accept build_vectors that aren't all constants or which have non-0 - // elements. - SDValue Zero = N->getOperand(i); - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) { - if (!CN->isNullValue()) - return false; - } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) { - if (!CFPN->getValueAPF().isPosZero()) - return false; - } else +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantSDNode or undef. +bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; - // Okay, we have at least one 0 value, check to see if the rest match or are - // undefs. - for (++i; i != e; ++i) - if (N->getOperand(i) != Zero && - N->getOperand(i).getOpcode() != ISD::UNDEF) + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) + continue; + if (!isa<ConstantSDNode>(Op)) return false; + } return true; } @@ -217,6 +233,21 @@ bool ISD::allOperandsUndef(const SDNode *N) { return true; } +ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) { + switch (ExtType) { + case ISD::EXTLOAD: + return ISD::ANY_EXTEND; + case ISD::SEXTLOAD: + return ISD::SIGN_EXTEND; + case ISD::ZEXTLOAD: + return ISD::ZERO_EXTEND; + default: + break; + } + + llvm_unreachable("Invalid LoadExtType"); +} + /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) /// when given the operation for (X op Y). ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { @@ -335,29 +366,42 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. /// static void AddNodeIDOperands(FoldingSetNodeID &ID, - const SDValue *Ops, unsigned NumOps) { - for (; NumOps; --NumOps, ++Ops) { - ID.AddPointer(Ops->getNode()); - ID.AddInteger(Ops->getResNo()); + ArrayRef<SDValue> Ops) { + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); } } /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. /// static void AddNodeIDOperands(FoldingSetNodeID &ID, - const SDUse *Ops, unsigned NumOps) { - for (; NumOps; --NumOps, ++Ops) { - ID.AddPointer(Ops->getNode()); - ID.AddInteger(Ops->getResNo()); + ArrayRef<SDUse> Ops) { + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); } } -static void AddNodeIDNode(FoldingSetNodeID &ID, - unsigned short OpC, SDVTList VTList, - const SDValue *OpList, unsigned N) { +static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, bool nuw, bool nsw, + bool exact) { + ID.AddBoolean(nuw); + ID.AddBoolean(nsw); + ID.AddBoolean(exact); +} + +/// AddBinaryNodeIDCustom - Add BinarySDNodes special infos +static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, unsigned Opcode, + bool nuw, bool nsw, bool exact) { + if (isBinOpWithFlags(Opcode)) + AddBinaryNodeIDCustom(ID, nuw, nsw, exact); +} + +static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, + SDVTList VTList, ArrayRef<SDValue> OpList) { AddNodeIDOpcode(ID, OpC); AddNodeIDValueTypes(ID, VTList); - AddNodeIDOperands(ID, OpList, N); + AddNodeIDOperands(ID, OpList); } /// AddNodeIDCustom - If this is an SDNode with special info, add this info to @@ -369,9 +413,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { llvm_unreachable("Should only be used on nodes with operands"); default: break; // Normal nodes don't need extra info. case ISD::TargetConstant: - case ISD::Constant: - ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue()); + case ISD::Constant: { + const ConstantSDNode *C = cast<ConstantSDNode>(N); + ID.AddPointer(C->getConstantIntValue()); + ID.AddBoolean(C->isOpaque()); break; + } case ISD::TargetConstantFP: case ISD::ConstantFP: { ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); @@ -442,7 +489,21 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } + case ISD::SDIV: + case ISD::UDIV: + case ISD::SRA: + case ISD::SRL: + case ISD::MUL: + case ISD::ADD: + case ISD::SUB: + case ISD::SHL: { + const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N); + AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(), + BinNode->hasNoSignedWrap(), BinNode->isExact()); + break; + } case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: @@ -496,7 +557,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { // Add the return value info. AddNodeIDValueTypes(ID, N->getVTList()); // Add the operand info. - AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands()); + AddNodeIDOperands(ID, N->ops()); // Handle SDNode leafs with special info. AddNodeIDCustom(ID, N); @@ -574,7 +635,7 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) { SDNode *N = DeadNodes.pop_back_val(); for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) - DUL->NodeDeleted(N, 0); + DUL->NodeDeleted(N, nullptr); // Take the node out of the appropriate CSE map. RemoveNodeFromCSEMaps(N); @@ -660,8 +721,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { case ISD::CONDCODE: assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && "Cond code doesn't exist!"); - Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0; - CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0; + Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr; + CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr; break; case ISD::ExternalSymbol: Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); @@ -678,8 +739,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { if (VT.isExtended()) { Erased = ExtendedValueTypeNodes.erase(VT); } else { - Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0; - ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0; + Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr; + ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr; } break; } @@ -741,11 +802,11 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, void *&InsertPos) { if (doNotCSE(N)) - return 0; + return nullptr; SDValue Ops[] = { Op }; FoldingSetNodeID ID; - AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); return Node; @@ -759,11 +820,11 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op1, SDValue Op2, void *&InsertPos) { if (doNotCSE(N)) - return 0; + return nullptr; SDValue Ops[] = { Op1, Op2 }; FoldingSetNodeID ID; - AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); return Node; @@ -774,14 +835,13 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, /// were replaced with those specified. If this node is never memoized, /// return null, otherwise return a pointer to the slot it would take. If a /// node already exists with these operands, the slot will be non-null. -SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, - const SDValue *Ops,unsigned NumOps, +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, void *&InsertPos) { if (doNotCSE(N)) - return 0; + return nullptr; FoldingSetNodeID ID; - AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); return Node; @@ -877,18 +937,16 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL), + : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(nullptr), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), - UpdateListeners(0) { + UpdateListeners(nullptr) { AllNodes.push_back(&EntryNode); DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti, - const TargetLowering *tli) { +void SelectionDAG::init(MachineFunction &mf, const TargetLowering *tli) { MF = &mf; - TTI = tti; TLI = tli; Context = &mf.getFunction()->getContext(); } @@ -906,6 +964,25 @@ void SelectionDAG::allnodes_clear() { DeallocateNode(AllNodes.begin()); } +BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, + SDVTList VTs, SDValue N1, + SDValue N2, bool nuw, bool nsw, + bool exact) { + if (isBinOpWithFlags(Opcode)) { + BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode( + Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + FN->setHasNoUnsignedWrap(nuw); + FN->setHasNoSignedWrap(nsw); + FN->setIsExact(exact); + + return FN; + } + + BinarySDNode *N = new (NodeAllocator) + BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + return N; +} + void SelectionDAG::clear() { allnodes_clear(); OperandAllocator.Reset(); @@ -915,11 +992,11 @@ void SelectionDAG::clear() { ExternalSymbols.clear(); TargetExternalSymbols.clear(); std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), - static_cast<CondCodeSDNode*>(0)); + static_cast<CondCodeSDNode*>(nullptr)); std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), - static_cast<SDNode*>(0)); + static_cast<SDNode*>(nullptr)); - EntryNode.UseList = 0; + EntryNode.UseList = nullptr; AllNodes.push_back(&EntryNode); Root = getEntryNode(); DbgInfo->clear(); @@ -943,6 +1020,15 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { getNode(ISD::TRUNCATE, DL, VT, Op); } +SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, + EVT OpVT) { + if (VT.bitsLE(Op.getValueType())) + return getNode(ISD::TRUNCATE, SL, VT, Op); + + TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT); + return getNode(TLI->getExtendForContent(BType), SL, VT, Op); +} + SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { assert(!VT.isVector() && "getZeroExtendInReg should use the vector element type instead of " @@ -955,6 +1041,36 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { getConstant(Imm, Op.getValueType())); } +SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op); +} + +SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op); +} + +SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op); +} + /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { @@ -964,19 +1080,37 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { return getNode(ISD::XOR, DL, VT, Val, NegOne); } -SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) { +SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { + EVT EltVT = VT.getScalarType(); + SDValue TrueValue; + switch (TLI->getBooleanContents(VT)) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + TrueValue = getConstant(1, VT); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), + VT); + break; + } + return getNode(ISD::XOR, DL, VT, Val, TrueValue); +} + +SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) { EVT EltVT = VT.getScalarType(); assert((EltVT.getSizeInBits() >= 64 || (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && "getConstant with a uint64_t value that doesn't fit in the type!"); - return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT); + return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT, isO); } -SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) { - return getConstant(*ConstantInt::get(*Context, Val), VT, isT); +SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT, bool isO) +{ + return getConstant(*ConstantInt::get(*Context, Val), VT, isT, isO); } -SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { +SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, + bool isO) { assert(VT.isInteger() && "Cannot create FP integer constant!"); EVT EltVT = VT.getScalarType(); @@ -1018,7 +1152,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) .trunc(ViaEltSizeInBits), - ViaEltVT, isT)); + ViaEltVT, isT, isO)); } // EltParts is currently in little endian order. If we actually want @@ -1039,7 +1173,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT, getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT, - &Ops[0], Ops.size())); + Ops)); return Result; } @@ -1047,16 +1181,17 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { "APInt size does not match type size!"); unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(EltVT), None); ID.AddPointer(Elt); - void *IP = 0; - SDNode *N = NULL; + ID.AddBoolean(isO); + void *IP = nullptr; + SDNode *N = nullptr; if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) if (!VT.isVector()) return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantSDNode(isT, Elt, EltVT); + N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); } @@ -1065,7 +1200,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { if (VT.isVector()) { SmallVector<SDValue, 8> Ops; Ops.assign(VT.getVectorNumElements(), Result); - Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size()); + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); } return Result; } @@ -1089,10 +1224,10 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ // we don't have issues with SNANs. unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(EltVT), None); ID.AddPointer(&V); - void *IP = 0; - SDNode *N = NULL; + void *IP = nullptr; + SDNode *N = nullptr; if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) if (!VT.isVector()) return SDValue(N, 0); @@ -1108,7 +1243,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ SmallVector<SDValue, 8> Ops; Ops.assign(VT.getVectorNumElements(), Result); // FIXME SDLoc info might be appropriate here - Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size()); + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); } return Result; } @@ -1143,26 +1278,19 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, if (BitWidth < 64) Offset = SignExtend64(Offset, BitWidth); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); - if (!GVar) { - // If GV is an alias then use the aliasee for determining thread-localness. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); - } - unsigned Opc; - if (GVar && GVar->isThreadLocal()) + if (GV->isThreadLocal()) Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; else Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); ID.AddInteger(GV->getType()->getAddressSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1177,9 +1305,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(FI); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1195,10 +1323,10 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, "Cannot set target flags on target-independent jump tables"); unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(JTI); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1220,12 +1348,12 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(Alignment); ID.AddInteger(Offset); ID.AddPointer(C); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1248,12 +1376,12 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(Alignment); ID.AddInteger(Offset); C->addSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1267,11 +1395,11 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, unsigned char TargetFlags) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0); + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); ID.AddInteger(Index); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1284,9 +1412,9 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None); ID.AddPointer(MBB); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1333,7 +1461,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { if ((unsigned)Cond >= CondCodeNodes.size()) CondCodeNodes.resize(Cond+1); - if (CondCodeNodes[Cond] == 0) { + if (!CondCodeNodes[Cond]) { CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond); CondCodeNodes[Cond] = N; AllNodes.push_back(N); @@ -1407,6 +1535,11 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, N1 = getUNDEF(VT); commuteShuffle(N1, N2, MaskVec); } + // Reset our undef status after accounting for the mask. + N2Undef = N2.getOpcode() == ISD::UNDEF; + // Re-check whether both sides ended up undef. + if (N1.getOpcode() == ISD::UNDEF && N2Undef) + return getUNDEF(VT); // If Identity shuffle return that node. bool Identity = true; @@ -1416,13 +1549,45 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, if (Identity && NElts) return N1; + // Shuffling a constant splat doesn't change the result. + if (N2Undef) { + SDValue V = N1; + + // Look through any bitcasts. We check that these don't change the number + // (and size) of elements and just changes their types. + while (V.getOpcode() == ISD::BITCAST) + V = V->getOperand(0); + + // A splat should always show up as a build vector node. + if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + // If this is a splat of an undef, shuffling it is also undef. + if (Splat && Splat.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // We only have a splat which can skip shuffles if there is a splatted + // value and no undef lanes rearranged by the shuffle. + if (Splat && UndefElements.none()) { + // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the + // number of elements match or the value splatted is a zero constant. + if (V.getValueType().getVectorNumElements() == + VT.getVectorNumElements()) + return N1; + if (auto *C = dyn_cast<ConstantSDNode>(Splat)) + if (C->isNullValue()) + return N1; + } + } + } + FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; - AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2); + AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops); for (unsigned i = 0; i != NElts; ++i) ID.AddInteger(MaskVec[i]); - void* IP = 0; + void* IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1441,6 +1606,27 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, return SDValue(N, 0); } +SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { + MVT VT = SV.getSimpleValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<int, 8> MaskVec; + + for (unsigned i = 0; i != NumElems; ++i) { + int Idx = SV.getMaskElt(i); + if (Idx >= 0) { + if (Idx < (int)NumElems) + Idx += NumElems; + else + Idx -= NumElems; + } + MaskVec.push_back(Idx); + } + + SDValue Op0 = SV.getOperand(0); + SDValue Op1 = SV.getOperand(1); + return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, &MaskVec[0]); +} + SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, @@ -1453,14 +1639,14 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, FoldingSetNodeID ID; SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; - AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); - void* IP = 0; + AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops); + void* IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), dl.getDebugLoc(), - Ops, 5, Code); + Ops, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1468,9 +1654,9 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); + AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); ID.AddInteger(RegNo); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1482,9 +1668,9 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0); + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None); ID.AddPointer(RegMask); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1497,9 +1683,9 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; - AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1); + AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops); ID.AddPointer(Label); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1518,11 +1704,11 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddPointer(BA); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1538,10 +1724,10 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { "SrcValue is not a pointer?"); FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0); + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); ID.AddPointer(V); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1554,10 +1740,10 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { /// getMDNode - Return an MDNodeSDNode which holds an MDNode. SDValue SelectionDAG::getMDNode(const MDNode *MD) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0); + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None); ID.AddPointer(MD); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1572,11 +1758,11 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS) { SDValue Ops[] = {Ptr}; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), &Ops[0], 1); + AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops); ID.AddInteger(SrcAS); ID.AddInteger(DestAS); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1640,7 +1826,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, case ISD::SETTRUE: case ISD::SETTRUE2: { const TargetLowering *TLI = TM.getTargetLowering(); - TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent Cnt = + TLI->getBooleanContents(N1->getValueType(0)); return getConstant( Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); } @@ -1755,17 +1942,14 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op, KnownZero, KnownOne, Depth); return (KnownZero & Mask) == Mask; } -/// ComputeMaskedBits - Determine which of the bits specified in Mask are -/// known to be either zero or one and return them in the KnownZero/KnownOne -/// bitsets. This code only analyzes bits in Mask, in order to short-circuit -/// processing. -void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, - APInt &KnownOne, unsigned Depth) const { +/// Determine which bits of Op are known to be either zero or one and return +/// them in the KnownZero/KnownOne bitsets. +void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, + APInt &KnownOne, unsigned Depth) const { const TargetLowering *TLI = TM.getTargetLowering(); unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); @@ -1780,48 +1964,40 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // We know all of the bits for a constant! KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); KnownZero = ~KnownOne; - return; + break; case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. KnownZero |= KnownZero2; - return; + break; case ISD::OR: - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; // Output known-1 are known to be set if set in either the LHS | RHS. KnownOne |= KnownOne2; - return; + break; case ISD::XOR: { - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); // Output known-1 are known to be set if set in only one of the LHS, RHS. KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); KnownZero = KnownZeroOut; - return; + break; } case ISD::MUL: { - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // If low bits are zero in either operand, output low known-0 bits. // Also compute a conserative estimate for high known-0 bits. @@ -1838,46 +2014,42 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, LeadZ = std::min(LeadZ, BitWidth); KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | APInt::getHighBitsSet(BitWidth, LeadZ); - return; + break; } case ISD::UDIV: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); - return; + break; } case ISD::SELECT: - ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; - return; + break; case ISD::SELECT_CC: - ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; - return; + break; case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: @@ -1885,14 +2057,23 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::SMULO: case ISD::UMULO: if (Op.getResNo() != 1) - return; - // The boolean result conforms to getBooleanContents. Fall through. + break; + // The boolean result conforms to getBooleanContents. + // If we know the result of a setcc has the top bits zero, use this info. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + break; case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. - if (TLI->getBooleanContents(Op.getValueType().isVector()) == - TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); - return; + break; case ISD::SHL: // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { @@ -1900,16 +2081,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) - return; + break; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero <<= ShAmt; KnownOne <<= ShAmt; // low bits known zero. KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt); } - return; + break; case ISD::SRL: // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { @@ -1917,31 +2097,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) - return; + break; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); KnownZero |= HighBits; // High bits known zero. } - return; + break; case ISD::SRA: if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { unsigned ShAmt = SA->getZExtValue(); // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) - return; + break; // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); @@ -1955,7 +2133,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, KnownOne |= HighBits; // New bits are known one. } } - return; + break; case ISD::SIGN_EXTEND_INREG: { EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); unsigned EBits = EVT.getScalarType().getSizeInBits(); @@ -1973,10 +2151,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, if (NewBits.getBoolValue()) InputDemandedBits |= InSignBit; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownOne &= InputDemandedBits; KnownZero &= InputDemandedBits; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. @@ -1990,7 +2167,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, KnownZero &= ~NewBits; KnownOne &= ~NewBits; } - return; + break; } case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: @@ -2000,7 +2177,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, unsigned LowBits = Log2_32(BitWidth)+1; KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); KnownOne.clearAllBits(); - return; + break; } case ISD::LOAD: { LoadSDNode *LD = cast<LoadSDNode>(Op); @@ -2010,9 +2187,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { - computeMaskedBitsLoad(*Ranges, KnownZero); + computeKnownBitsFromRangeMetadata(*Ranges, KnownZero); } - return; + break; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); @@ -2020,11 +2197,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; - return; + break; } case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); @@ -2033,13 +2210,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // Note if the sign bit is known to be zero or one. bool SignBitKnownZero = KnownZero.isNegative(); bool SignBitKnownOne = KnownOne.isNegative(); - assert(!(SignBitKnownZero && SignBitKnownOne) && - "Sign bit can't be known to be both zero and one!"); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -2049,25 +2224,24 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, KnownZero |= NewBits; else if (SignBitKnownOne) KnownOne |= NewBits; - return; + break; } case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); - return; + break; } case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); KnownZero = KnownZero.zext(InBits); KnownOne = KnownOne.zext(InBits); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.trunc(BitWidth); KnownOne = KnownOne.trunc(BitWidth); break; @@ -2075,15 +2249,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::AssertZext: { EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero |= (~InMask); KnownOne &= (~KnownZero); - return; + break; } case ISD::FGETSIGN: // All bits are zero except the low bit. KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1); - return; + break; case ISD::SUB: { if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) { @@ -2094,7 +2268,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -2113,18 +2287,16 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); KnownZeroOut = std::min(KnownZeroOut, KnownZero2.countTrailingOnes()); if (Op.getOpcode() == ISD::ADD) { KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); - return; + break; } // With ADDE, a carry bit may be added in, so we can only use this @@ -2133,14 +2305,14 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // are known zero. if (KnownZeroOut >= 2) // ADDE KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut); - return; + break; } case ISD::SREM: if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -2158,36 +2330,38 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); } } - return; + break; case ISD::UREM: { if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - KnownZero |= ~LowBits; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1); - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1); + + // The upper bits are all zero, the lower ones are unchanged. + KnownZero = KnownZero2 | ~LowBits; + KnownOne = KnownOne2 & LowBits; break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); - return; + break; } case ISD::FrameIndex: case ISD::TargetFrameIndex: if (unsigned Align = InferPtrAlignment(Op)) { // The low bits are known zero if the pointer is aligned. KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align)); - return; + break; } break; @@ -2199,9 +2373,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI->computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); - return; + TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); + break; } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } /// ComputeNumSignBits - Return the number of times the sign bit of the @@ -2275,7 +2451,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses - // ComputeMaskedBits, and pick whichever answer is better. + // computeKnownBits, and pick whichever answer is better. } break; @@ -2294,9 +2470,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (Op.getResNo() != 1) break; // The boolean result conforms to getBooleanContents. Fall through. + // If setcc returns 0/-1, all bits are sign bits. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; case ISD::SETCC: // If setcc returns 0/-1, all bits are sign bits. - if (TLI->getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; @@ -2325,7 +2508,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -2350,7 +2533,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) @@ -2397,14 +2580,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) { - unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, Depth); + unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth); if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); } // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero, KnownOne; - ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + computeKnownBits(Op, KnownZero, KnownOne, Depth); APInt Mask; if (KnownZero.isNegative()) { // sign bit is 0 @@ -2492,8 +2675,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { /// SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); - void *IP = 0; + AddNodeIDNode(ID, Opcode, getVTList(VT), None); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -2510,17 +2693,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue Operand) { - // Constant fold unary operations with an integer constant operand. + // Constant fold unary operations with an integer constant operand. Even + // opaque constant will be folded, because the folding of unary operations + // doesn't create new constants with different values. Nevertheless, the + // opaque flag is preserved during folding to prevent future folding with + // other constants. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) { const APInt &Val = C->getAPIntValue(); switch (Opcode) { default: break; case ISD::SIGN_EXTEND: - return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT); + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT, + C->isTargetOpcode(), C->isOpaque()); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::TRUNCATE: - return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT, + C->isTargetOpcode(), C->isOpaque()); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { APFloat apf(EVTToAPFloatSemantics(VT), @@ -2537,15 +2726,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT); break; case ISD::BSWAP: - return getConstant(Val.byteSwap(), VT); + return getConstant(Val.byteSwap(), VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::CTPOP: - return getConstant(Val.countPopulation(), VT); + return getConstant(Val.countPopulation(), VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - return getConstant(Val.countLeadingZeros(), VT); + return getConstant(Val.countLeadingZeros(), VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - return getConstant(Val.countTrailingZeros(), VT); + return getConstant(Val.countTrailingZeros(), VT, C->isTargetOpcode(), + C->isOpaque()); } } @@ -2608,6 +2801,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, } } + // Constant fold unary operations with a vector integer operand. + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) { + if (BV->isConstant()) { + switch (Opcode) { + default: + // FIXME: Entirely reasonable to perform folding of other unary + // operations here as the need arises. + break; + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + SmallVector<SDValue, 8> Ops; + for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + SDValue OpN = BV->getOperand(i); + // Let the above scalar folding handle the conversion of each + // element. + OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(), + OpN); + Ops.push_back(OpN); + } + return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + } + } + } + } + unsigned OpOpcode = Operand.getNode()->getOpcode(); switch (Opcode) { case ISD::TokenFactor: @@ -2754,8 +2972,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, if (VT != MVT::Glue) { // Don't CSE flag producing nodes FoldingSetNodeID ID; SDValue Ops[1] = { Operand }; - AddNodeIDNode(ID, Opcode, VTs, Ops, 1); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -2776,16 +2994,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, SDNode *Cst1, SDNode *Cst2) { + // If the opcode is a target-specific ISD node, there's nothing we can + // do here and the operand rules may not line up with the below, so + // bail early. + if (Opcode >= ISD::BUILTIN_OP_END) + return SDValue(); + SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs; SmallVector<SDValue, 4> Outputs; EVT SVT = VT.getScalarType(); ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1); ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2); - if (Scalar1 && Scalar2) { + if (Scalar1 && Scalar2 && (Scalar1->isOpaque() || Scalar2->isOpaque())) + return SDValue(); + + if (Scalar1 && Scalar2) // Scalar instruction. Inputs.push_back(std::make_pair(Scalar1, Scalar2)); - } else { + else { // For vectors extract each constant element into Inputs so we can constant // fold them individually. BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); @@ -2801,6 +3028,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, if (!V1 || !V2) // Not a constant, bail. return SDValue(); + if (V1->isOpaque() || V2->isOpaque()) + return SDValue(); + // Avoid BUILD_VECTOR nodes that perform implicit truncation. // FIXME: This is valid and could be handled by truncating the APInts. if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) @@ -2874,17 +3104,22 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, } } + assert((Scalar1 && Scalar2) || (VT.getVectorNumElements() == Outputs.size() && + "Expected a scalar or vector!")); + // Handle the scalar case first. - if (Scalar1 && Scalar2) + if (!VT.isVector()) return Outputs.back(); - // Otherwise build a big vector out of the scalar elements we generated. - return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs.data(), - Outputs.size()); + // We may have a vector type but a scalar result. Create a splat. + Outputs.resize(VT.getVectorNumElements(), Outputs.back()); + + // Build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, - SDValue N2) { + SDValue N2, bool nuw, bool nsw, bool exact) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); switch (Opcode) { @@ -2910,7 +3145,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); - return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + return getNode(ISD::BUILD_VECTOR, DL, VT, Elts); } break; case ISD::AND: @@ -3324,22 +3559,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Memoize this node if possible. - SDNode *N; + BinarySDNode *N; SDVTList VTs = getVTList(VT); + const bool BinOpHasFlags = isBinOpWithFlags(Opcode); if (VT != MVT::Glue) { - SDValue Ops[] = { N1, N2 }; + SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTs, Ops, 2); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTs, Ops); + if (BinOpHasFlags) + AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, N1, N2); + N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); + CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, N1, N2); + + N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); } AllNodes.push_back(N); @@ -3379,7 +3617,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, N1.getNode()->op_end()); Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end()); - return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + return getNode(ISD::BUILD_VECTOR, DL, VT, Elts); } break; case ISD::SETCC: { @@ -3436,8 +3674,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, if (VT != MVT::Glue) { SDValue Ops[] = { N1, N2, N3 }; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTs, Ops, 3); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -3460,14 +3698,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; - return getNode(Opcode, DL, VT, Ops, 4); + return getNode(Opcode, DL, VT, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; - return getNode(Opcode, DL, VT, Ops, 5); + return getNode(Opcode, DL, VT, Ops); } /// getStackArgumentTokenFactor - Compute a TokenFactor to force all @@ -3489,8 +3727,7 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { ArgChains.push_back(SDValue(L, 1)); // Build a tokenfactor for all the chains. - return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, - &ArgChains[0], ArgChains.size()); + return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); } /// getMemsetValue - Vectorized representation of the memset value @@ -3528,7 +3765,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, if (Str.empty()) { if (VT.isInteger()) return DAG.getConstant(0, VT); - else if (VT == MVT::f32 || VT == MVT::f64) + else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) return DAG.getConstantFP(0.0, VT); else if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); @@ -3554,12 +3791,12 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; } - // If the "cost" of materializing the integer immediate is 1 or free, then - // it is cost effective to turn the load into the immediate. - const TargetTransformInfo *TTI = DAG.getTargetTransformInfo(); - if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2) + // If the "cost" of materializing the integer immediate is less than the cost + // of a load, then it is cost effective to turn the load into the immediate. + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) return DAG.getConstant(Val, VT); - return SDValue(0, 0); + return SDValue(nullptr, 0); } /// getMemBasePlusOffset - Returns base and offset node for the @@ -3575,7 +3812,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl, /// static bool isMemSrcFromString(SDValue Src, StringRef &Str) { unsigned SrcDelta = 0; - GlobalAddressSDNode *G = NULL; + GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) G = cast<GlobalAddressSDNode>(Src); else if (Src.getOpcode() == ISD::ADD && @@ -3617,8 +3854,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, DAG.getMachineFunction()); if (VT == MVT::Other) { - if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() || - TLI.allowsUnalignedMemoryAccesses(VT)) { + unsigned AS = 0; + if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) || + TLI.allowsUnalignedMemoryAccesses(VT, AS)) { VT = TLI.getPointerTy(); } else { switch (DstAlign & 7) { @@ -3675,9 +3913,10 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // FIXME: Only does this for 64-bit or more since we don't have proper // cost model for unaligned load / store. bool Fast; + unsigned AS = 0; if (NumMemOps && AllowOverlap && VTSize >= 8 && NewVTSize < Size && - TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast) + TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast) VTSize = Size; else { VT = NewVT; @@ -3809,8 +4048,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, Size -= VTSize; } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, @@ -3875,8 +4113,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &LoadChains[0], LoadChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); OutChains.clear(); for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; @@ -3890,8 +4127,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, DstOff += VTSize; } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } /// \brief Lower the call to 'memset' intrinsic function into a series of store @@ -3992,8 +4228,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, Size -= VTSize; } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, @@ -4052,15 +4287,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc - TargetLowering:: - CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), - false, false, false, false, 0, - TLI->getLibcallCallingConv(RTLIB::MEMCPY), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), - TLI->getPointerTy()), - Args, *this, dl); + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), + TLI->getPointerTy()), std::move(Args), 0) + .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -4110,15 +4343,13 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc - TargetLowering:: - CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), - false, false, false, false, 0, - TLI->getLibcallCallingConv(RTLIB::MEMMOVE), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), - TLI->getPointerTy()), - Args, *this, dl); + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), + TLI->getPointerTy()), std::move(Args), 0) + .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -4174,31 +4405,31 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, Entry.Ty = IntPtrTy; Entry.isSExt = false; Args.push_back(Entry); + // FIXME: pass in SDLoc - TargetLowering:: - CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), - false, false, false, false, 0, - TLI->getLibcallCallingConv(RTLIB::MEMSET), - /*isTailCall=*/false, - /*doesNotReturn*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), - TLI->getPointerTy()), - Args, *this, dl); - std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy()), std::move(Args), 0) + .setDiscardResult(); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDVTList VTList, SDValue* Ops, unsigned NumOps, + SDVTList VTList, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, - AtomicOrdering Ordering, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { FoldingSetNodeID ID; ID.AddInteger(MemVT.getRawBits()); - AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; + void* IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4209,59 +4440,70 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, // the node is deallocated, but recovered when the allocator is released. // If the number of operands is less than 5 we use AtomicSDNode's internal // storage. - SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) : 0; + unsigned NumOps = Ops.size(); + SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) + : nullptr; SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, MemVT, - Ops, DynOps, NumOps, MMO, - Ordering, SynchScope); + Ops.data(), DynOps, NumOps, MMO, + SuccessOrdering, FailureOrdering, + SynchScope); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDValue Chain, SDValue Ptr, SDValue Cmp, - SDValue Swp, MachinePointerInfo PtrInfo, - unsigned Alignment, + SDVTList VTList, ArrayRef<SDValue> Ops, + MachineMemOperand *MMO, AtomicOrdering Ordering, SynchronizationScope SynchScope) { + return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering, + Ordering, SynchScope); +} + +SDValue SelectionDAG::getAtomicCmpSwap( + unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs, SDValue Chain, + SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, + unsigned Alignment, AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP || + Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE. - // For now, atomics are considered to be volatile always. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. unsigned Flags = MachineMemOperand::MOVolatile; - if (Opcode != ISD::ATOMIC_STORE) - Flags |= MachineMemOperand::MOLoad; - if (Opcode != ISD::ATOMIC_LOAD) - Flags |= MachineMemOperand::MOStore; + Flags |= MachineMemOperand::MOLoad; + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); - return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO, - Ordering, SynchScope); + return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO, + SuccessOrdering, FailureOrdering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDValue Chain, - SDValue Ptr, SDValue Cmp, - SDValue Swp, MachineMemOperand *MMO, - AtomicOrdering Ordering, - SynchronizationScope SynchScope) { - assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); +SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTs, SDValue Chain, SDValue Ptr, + SDValue Cmp, SDValue Swp, + MachineMemOperand *MMO, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, + SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP || + Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); - EVT VT = Cmp.getValueType(); - - SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, + SuccessOrdering, FailureOrdering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -4320,38 +4562,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr, Val}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope); -} - -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - EVT VT, SDValue Chain, - SDValue Ptr, - const Value* PtrVal, - unsigned Alignment, - AtomicOrdering Ordering, - SynchronizationScope SynchScope) { - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(MemVT); - - MachineFunction &MF = getMachineFunction(); - // An atomic store does not load. An atomic load does not store. - // (An atomicrmw obviously both loads and stores.) - // For now, atomics are considered to be volatile always, and they are - // chained as such. - // FIXME: Volatile isn't really correct; we should keep track of atomic - // orderings in the memoperand. - unsigned Flags = MachineMemOperand::MOVolatile; - if (Opcode != ISD::ATOMIC_STORE) - Flags |= MachineMemOperand::MOLoad; - if (Opcode != ISD::ATOMIC_LOAD) - Flags |= MachineMemOperand::MOStore; - - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, - MemVT.getStoreSize(), Alignment); - - return getAtomic(Opcode, dl, MemVT, VT, Chain, Ptr, MMO, - Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -4364,38 +4575,24 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope); } /// getMergeValues - Create a MERGE_VALUES node from the given operands. -SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, - SDLoc dl) { - if (NumOps == 1) +SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, SDLoc dl) { + if (Ops.size() == 1) return Ops[0]; SmallVector<EVT, 4> VTs; - VTs.reserve(NumOps); - for (unsigned i = 0; i < NumOps; ++i) + VTs.reserve(Ops.size()); + for (unsigned i = 0; i < Ops.size(); ++i) VTs.push_back(Ops[i].getValueType()); - return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps), - Ops, NumOps); -} - -SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, - const EVT *VTs, unsigned NumVTs, - const SDValue *Ops, unsigned NumOps, - EVT MemVT, MachinePointerInfo PtrInfo, - unsigned Align, bool Vol, - bool ReadMem, bool WriteMem) { - return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, - MemVT, PtrInfo, Align, Vol, - ReadMem, WriteMem); + return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops); } SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, - const SDValue *Ops, unsigned NumOps, + ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, bool ReadMem, bool WriteMem) { @@ -4413,13 +4610,13 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align); - return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); + return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, - const SDValue *Ops, unsigned NumOps, - EVT MemVT, MachineMemOperand *MMO) { + ArrayRef<SDValue> Ops, EVT MemVT, + MachineMemOperand *MMO) { assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || @@ -4433,9 +4630,9 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, MemIntrinsicSDNode *N; if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4443,12 +4640,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, - NumOps, MemVT, MMO); + MemVT, MMO); CSEMap.InsertNode(N, IP); } else { N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, - NumOps, MemVT, MMO); + MemVT, MMO); } AllNodes.push_back(N); return SDValue(N, 0); @@ -4511,7 +4708,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. - if (PtrInfo.V == 0) + if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(Ptr, Offset); MachineFunction &MF = getMachineFunction(); @@ -4551,13 +4748,13 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); SDValue Ops[] = { Chain, Ptr, Offset }; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4638,7 +4835,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; - if (PtrInfo.V == 0) + if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(Ptr); MachineFunction &MF = getMachineFunction(); @@ -4659,12 +4856,12 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Undef = getUNDEF(Ptr.getValueType()); SDValue Ops[] = { Chain, Val, Ptr, Undef }; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4693,7 +4890,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; - if (PtrInfo.V == 0) + if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(Ptr); MachineFunction &MF = getMachineFunction(); @@ -4728,12 +4925,12 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Undef = getUNDEF(Ptr.getValueType()); SDValue Ops[] = { Chain, Val, Ptr, Undef }; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); ID.AddInteger(SVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4755,11 +4952,11 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -4778,14 +4975,14 @@ SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, SDValue SV, unsigned Align) { SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) }; - return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4); + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - const SDUse *Ops, unsigned NumOps) { - switch (NumOps) { + ArrayRef<SDUse> Ops) { + switch (Ops.size()) { case 0: return getNode(Opcode, DL, VT); - case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0])); case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); default: break; @@ -4793,12 +4990,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, // Copy from an SDUse array into an SDValue array for use with // the regular getNode logic. - SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps); - return getNode(Opcode, DL, VT, &NewOps[0], NumOps); + SmallVector<SDValue, 8> NewOps(Ops.begin(), Ops.end()); + return getNode(Opcode, DL, VT, NewOps); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { + unsigned NumOps = Ops.size(); switch (NumOps) { case 0: return getNode(Opcode, DL, VT); case 1: return getNode(Opcode, DL, VT, Ops[0]); @@ -4833,18 +5031,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, if (VT != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTs, Ops, NumOps); + VTs, Ops); CSEMap.InsertNode(N, IP); } else { N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTs, Ops, NumOps); + VTs, Ops); } AllNodes.push_back(N); @@ -4855,24 +5053,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, - ArrayRef<EVT> ResultTys, - const SDValue *Ops, unsigned NumOps) { - return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), - Ops, NumOps); -} - -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, - const EVT *VTs, unsigned NumVTs, - const SDValue *Ops, unsigned NumOps) { - if (NumVTs == 1) - return getNode(Opcode, DL, VTs[0], Ops, NumOps); - return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps); + ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { + return getNode(Opcode, DL, getVTList(ResultTys), Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { if (VTList.NumVTs == 1) - return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); + return getNode(Opcode, DL, VTList.VTs[0], Ops); #if 0 switch (Opcode) { @@ -4899,10 +5087,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, // Memoize the node unless it returns a flag. SDNode *N; + unsigned NumOps = Ops.size(); if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTList, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -4919,7 +5108,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, Ops[1], Ops[2]); } else { N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTList, Ops, NumOps); + VTList, Ops); } CSEMap.InsertNode(N, IP); } else { @@ -4936,7 +5125,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, Ops[1], Ops[2]); } else { N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTList, Ops, NumOps); + VTList, Ops); } } AllNodes.push_back(N); @@ -4947,39 +5136,39 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) { - return getNode(Opcode, DL, VTList, 0, 0); + return getNode(Opcode, DL, VTList, ArrayRef<SDValue>()); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1) { SDValue Ops[] = { N1 }; - return getNode(Opcode, DL, VTList, Ops, 1); + return getNode(Opcode, DL, VTList, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2) { SDValue Ops[] = { N1, N2 }; - return getNode(Opcode, DL, VTList, Ops, 2); + return getNode(Opcode, DL, VTList, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3) { SDValue Ops[] = { N1, N2, N3 }; - return getNode(Opcode, DL, VTList, Ops, 3); + return getNode(Opcode, DL, VTList, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; - return getNode(Opcode, DL, VTList, Ops, 4); + return getNode(Opcode, DL, VTList, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; - return getNode(Opcode, DL, VTList, Ops, 5); + return getNode(Opcode, DL, VTList, Ops); } SDVTList SelectionDAG::getVTList(EVT VT) { @@ -4992,9 +5181,9 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { ID.AddInteger(VT1.getRawBits()); ID.AddInteger(VT2.getRawBits()); - void *IP = 0; + void *IP = nullptr; SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); - if (Result == NULL) { + if (!Result) { EVT *Array = Allocator.Allocate<EVT>(2); Array[0] = VT1; Array[1] = VT2; @@ -5011,9 +5200,9 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { ID.AddInteger(VT2.getRawBits()); ID.AddInteger(VT3.getRawBits()); - void *IP = 0; + void *IP = nullptr; SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); - if (Result == NULL) { + if (!Result) { EVT *Array = Allocator.Allocate<EVT>(3); Array[0] = VT1; Array[1] = VT2; @@ -5032,9 +5221,9 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { ID.AddInteger(VT3.getRawBits()); ID.AddInteger(VT4.getRawBits()); - void *IP = 0; + void *IP = nullptr; SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); - if (Result == NULL) { + if (!Result) { EVT *Array = Allocator.Allocate<EVT>(4); Array[0] = VT1; Array[1] = VT2; @@ -5046,18 +5235,19 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { return Result->getSDVTList(); } -SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { +SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) { + unsigned NumVTs = VTs.size(); FoldingSetNodeID ID; ID.AddInteger(NumVTs); for (unsigned index = 0; index < NumVTs; index++) { ID.AddInteger(VTs[index].getRawBits()); } - void *IP = 0; + void *IP = nullptr; SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); - if (Result == NULL) { + if (!Result) { EVT *Array = Allocator.Allocate<EVT>(NumVTs); - std::copy(VTs, VTs + NumVTs, Array); + std::copy(VTs.begin(), VTs.end(), Array); Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); VTListMap.InsertNode(Result, IP); } @@ -5078,14 +5268,14 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { if (Op == N->getOperand(0)) return N; // See if the modified node already exists. - void *InsertPos = 0; + void *InsertPos = nullptr; if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) if (!RemoveNodeFromCSEMaps(N)) - InsertPos = 0; + InsertPos = nullptr; // Now we update the operands. N->OperandList[0].set(Op); @@ -5103,14 +5293,14 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { return N; // No operands changed, just return the input node. // See if the modified node already exists. - void *InsertPos = 0; + void *InsertPos = nullptr; if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) if (!RemoveNodeFromCSEMaps(N)) - InsertPos = 0; + InsertPos = nullptr; // Now we update the operands. if (N->OperandList[0] != Op1) @@ -5126,25 +5316,26 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { SDNode *SelectionDAG:: UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { SDValue Ops[] = { Op1, Op2, Op3 }; - return UpdateNodeOperands(N, Ops, 3); + return UpdateNodeOperands(N, Ops); } SDNode *SelectionDAG:: UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4) { SDValue Ops[] = { Op1, Op2, Op3, Op4 }; - return UpdateNodeOperands(N, Ops, 4); + return UpdateNodeOperands(N, Ops); } SDNode *SelectionDAG:: UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4, SDValue Op5) { SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; - return UpdateNodeOperands(N, Ops, 5); + return UpdateNodeOperands(N, Ops); } SDNode *SelectionDAG:: -UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) { +UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { + unsigned NumOps = Ops.size(); assert(N->getNumOperands() == NumOps && "Update with wrong number of operands"); @@ -5161,14 +5352,14 @@ UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) { if (!AnyChange) return N; // See if the modified node already exists. - void *InsertPos = 0; - if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) + void *InsertPos = nullptr; + if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos)) return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) if (!RemoveNodeFromCSEMaps(N)) - InsertPos = 0; + InsertPos = nullptr; // Now we update the operands. for (unsigned i = 0; i != NumOps; ++i) @@ -5197,14 +5388,14 @@ void SDNode::DropOperands() { SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT) { SDVTList VTs = getVTList(VT); - return SelectNodeTo(N, MachineOpc, VTs, 0, 0); + return SelectNodeTo(N, MachineOpc, VTs, None); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5212,7 +5403,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5220,41 +5411,39 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - EVT VT, const SDValue *Ops, - unsigned NumOps) { + EVT VT, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT); - return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - EVT VT1, EVT VT2, const SDValue *Ops, - unsigned NumOps) { + EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2); - return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); - return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0); + return SelectNodeTo(N, MachineOpc, VTs, None); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2, EVT VT3, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); - return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2, EVT VT3, EVT VT4, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); - return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5262,7 +5451,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5270,7 +5459,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5279,7 +5468,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2, Op3 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5288,13 +5477,12 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2, Op3 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - SDVTList VTs, const SDValue *Ops, - unsigned NumOps) { - N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps); + SDVTList VTs,ArrayRef<SDValue> Ops) { + N = MorphNodeTo(N, ~MachineOpc, VTs, Ops); // Reset the NodeID to -1. N->setNodeId(-1); return N; @@ -5331,19 +5519,19 @@ SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { /// the node's users. /// SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - SDVTList VTs, const SDValue *Ops, - unsigned NumOps) { + SDVTList VTs, ArrayRef<SDValue> Ops) { + unsigned NumOps = Ops.size(); // If an identical node already exists, use it. - void *IP = 0; + void *IP = nullptr; if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); + AddNodeIDNode(ID, Opc, VTs, Ops); if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); } if (!RemoveNodeFromCSEMaps(N)) - IP = 0; + IP = nullptr; // Start the morphing. N->NodeType = Opc; @@ -5363,7 +5551,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) { // Initialize the memory references information. - MN->setMemRefs(0, 0); + MN->setMemRefs(nullptr, nullptr); // If NumOps is larger than the # of operands we can have in a // MachineSDNode, reallocate the operand list. if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) { @@ -5374,22 +5562,22 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, // remainder of the current SelectionDAG iteration, so we can allocate // the operands directly out of a pool with no recycling metadata. MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), - Ops, NumOps); + Ops.data(), NumOps); else - MN->InitOperands(MN->LocalOperands, Ops, NumOps); + MN->InitOperands(MN->LocalOperands, Ops.data(), NumOps); MN->OperandsNeedDelete = false; } else - MN->InitOperands(MN->OperandList, Ops, NumOps); + MN->InitOperands(MN->OperandList, Ops.data(), NumOps); } else { // If NumOps is larger than the # of operands we currently have, reallocate // the operand list. if (NumOps > N->NumOperands) { if (N->OperandsNeedDelete) delete[] N->OperandList; - N->InitOperands(new SDUse[NumOps], Ops, NumOps); + N->InitOperands(new SDUse[NumOps], Ops.data(), NumOps); N->OperandsNeedDelete = true; } else - N->InitOperands(N->OperandList, Ops, NumOps); + N->InitOperands(N->OperandList, Ops.data(), NumOps); } // Delete any nodes that are still dead after adding the uses for the @@ -5528,7 +5716,7 @@ MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { - SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); + SDVTList VTs = getVTList(ResultTys); return getMachineNode(Opcode, dl, VTs, Ops); } @@ -5537,14 +5725,14 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, ArrayRef<SDValue> OpsArray) { bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; - void *IP = 0; + void *IP = nullptr; const SDValue *Ops = OpsArray.data(); unsigned NumOps = OpsArray.size(); if (DoCSE) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); - IP = 0; + AddNodeIDNode(ID, ~Opcode, VTs, OpsArray); + IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL)); } @@ -5600,34 +5788,42 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, - const SDValue *Ops, unsigned NumOps) { - if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { + ArrayRef<SDValue> Ops, bool nuw, bool nsw, + bool exact) { + if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTList, Ops); + if (isBinOpWithFlags(Opcode)) + AddBinaryNodeIDCustom(ID, nuw, nsw, exact); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return E; } - return NULL; + return nullptr; } /// getDbgValue - Creates a SDDbgValue node. /// +/// SDNode SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off, +SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, + bool IsIndirect, uint64_t Off, DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O); + return new (Allocator) SDDbgValue(MDPtr, N, R, IsIndirect, Off, DL, O); } +/// Constant SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off, - DebugLoc DL, unsigned O) { +SelectionDAG::getConstantDbgValue(MDNode *MDPtr, const Value *C, + uint64_t Off, + DebugLoc DL, unsigned O) { return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O); } +/// FrameIndex SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off, - DebugLoc DL, unsigned O) { +SelectionDAG::getFrameIndexDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off, + DebugLoc DL, unsigned O) { return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O); } @@ -5641,7 +5837,7 @@ class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { SDNode::use_iterator &UI; SDNode::use_iterator &UE; - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { // Increment the iterator as needed. while (UI != UE && N == *UI) ++UI; @@ -5945,7 +6141,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // count of outstanding operands. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { SDNode *N = I++; - checkForCycles(N); + checkForCycles(N, this); unsigned Degree = N->getNumOperands(); if (Degree == 0) { // A node with no uses, add it to the result array immediately. @@ -5965,7 +6161,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // such that by the time the end is reached all nodes will be sorted. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { SDNode *N = I; - checkForCycles(N); + checkForCycles(N, this); // N is in sorted position, so all its uses have one less operand // that needs to be sorted. for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); @@ -5990,9 +6186,11 @@ unsigned SelectionDAG::AssignTopologicalOrder() { #ifndef NDEBUG SDNode *S = ++I; dbgs() << "Overran sorted position:\n"; - S->dumprFull(); + S->dumprFull(this); dbgs() << "\n"; + dbgs() << "Checking if this is due to cycles\n"; + checkForCycles(this, true); #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -6033,6 +6231,7 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { SDDbgValue *Dbg = *I; if (Dbg->getKind() == SDDbgValue::SDNODE) { SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(), + Dbg->isIndirect(), Dbg->getOffset(), Dbg->getDebugLoc(), Dbg->getOrder()); ClonedDVs.push_back(Clone); @@ -6076,9 +6275,8 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, } MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, - const SDValue *Ops, unsigned NumOps, EVT memvt, - MachineMemOperand *mmo) - : SDNode(Opc, Order, dl, VTs, Ops, NumOps), + ArrayRef<SDValue> Ops, EVT memvt, MachineMemOperand *mmo) + : SDNode(Opc, Order, dl, VTs, Ops), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); @@ -6297,12 +6495,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { switch (N->getOpcode()) { default: - Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, - &Operands[0], Operands.size())); + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands)); break; case ISD::VSELECT: - Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, - &Operands[0], Operands.size())); + Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); break; case ISD::SHL: case ISD::SRA: @@ -6327,8 +6523,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Scalars.push_back(getUNDEF(EltVT)); return getNode(ISD::BUILD_VECTOR, dl, - EVT::getVectorVT(*getContext(), EltVT, ResNE), - &Scalars[0], Scalars.size()); + EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars); } @@ -6362,8 +6557,8 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) return true; - const GlobalValue *GV1 = NULL; - const GlobalValue *GV2 = NULL; + const GlobalValue *GV1 = nullptr; + const GlobalValue *GV2 = nullptr; int64_t Offset1 = 0; int64_t Offset2 = 0; const TargetLowering *TLI = TM.getTargetLowering(); @@ -6385,8 +6580,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); - llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, - TLI->getDataLayout()); + llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, + TLI->getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) @@ -6448,6 +6643,22 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, return std::make_pair(Lo, Hi); } +void SelectionDAG::ExtractVectorElements(SDValue Op, + SmallVectorImpl<SDValue> &Args, + unsigned Start, unsigned Count) { + EVT VT = Op.getValueType(); + if (Count == 0) + Count = VT.getVectorNumElements(); + + EVT EltVT = VT.getVectorElementType(); + EVT IdxTy = TLI->getVectorIdxTy(); + SDLoc SL(Op); + for (unsigned i = Start, e = Start + Count; i != e; ++i) { + Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, + Op, getConstant(i, IdxTy))); + } +} + // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); @@ -6465,7 +6676,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits, - bool isBigEndian) { + bool isBigEndian) const { EVT VT = getValueType(0); assert(VT.isVector() && "Expected a vector type"); unsigned sz = VT.getSizeInBits(); @@ -6526,6 +6737,54 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, return true; } +SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { + if (UndefElements) { + UndefElements->clear(); + UndefElements->resize(getNumOperands()); + } + SDValue Splatted; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + SDValue Op = getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) { + if (UndefElements) + (*UndefElements)[i] = true; + } else if (!Splatted) { + Splatted = Op; + } else if (Splatted != Op) { + return SDValue(); + } + } + + if (!Splatted) { + assert(getOperand(0).getOpcode() == ISD::UNDEF && + "Can only have a splat without a constant for all undefs."); + return getOperand(0); + } + + return Splatted; +} + +ConstantSDNode * +BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantSDNode>( + getSplatValue(UndefElements).getNode()); +} + +ConstantFPSDNode * +BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantFPSDNode>( + getSplatValue(UndefElements).getNode()); +} + +bool BuildVectorSDNode::isConstant() const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + unsigned Opc = getOperand(i).getOpcode(); + if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP) + return false; + } + return true; +} + bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { // Find the first non-undef value in the shuffle mask. unsigned i, e; @@ -6542,10 +6801,11 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return true; } -#ifdef XDEBUG +#ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSet<const SDNode*, 32> &Visited, - SmallPtrSet<const SDNode*, 32> &Checked) { + SmallPtrSet<const SDNode*, 32> &Checked, + const llvm::SelectionDAG *DAG) { // If this node has already been checked, don't check it again. if (Checked.count(N)) return; @@ -6553,29 +6813,37 @@ static void checkForCyclesHelper(const SDNode *N, // If a node has already been visited on this depth-first walk, reject it as // a cycle. if (!Visited.insert(N)) { - dbgs() << "Offending node:\n"; - N->dumprFull(); errs() << "Detected cycle in SelectionDAG\n"; + dbgs() << "Offending node:\n"; + N->dumprFull(DAG); dbgs() << "\n"; abort(); } for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked); + checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked, DAG); Checked.insert(N); Visited.erase(N); } #endif -void llvm::checkForCycles(const llvm::SDNode *N) { +void llvm::checkForCycles(const llvm::SDNode *N, + const llvm::SelectionDAG *DAG, + bool force) { +#ifndef NDEBUG + bool check = force; #ifdef XDEBUG - assert(N && "Checking nonexistent SDNode"); - SmallPtrSet<const SDNode*, 32> visited; - SmallPtrSet<const SDNode*, 32> checked; - checkForCyclesHelper(N, visited, checked); -#endif + check = true; +#endif // XDEBUG + if (check) { + assert(N && "Checking nonexistent SDNode"); + SmallPtrSet<const SDNode*, 32> visited; + SmallPtrSet<const SDNode*, 32> checked; + checkForCyclesHelper(N, visited, checked, DAG); + } +#endif // !NDEBUG } -void llvm::checkForCycles(const llvm::SelectionDAG *DAG) { - checkForCycles(DAG->getRoot().getNode()); +void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) { + checkForCycles(DAG->getRoot().getNode(), DAG, force); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 41662a9..c07b5e6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "isel" #include "SelectionDAGBuilder.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" @@ -34,10 +33,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -62,6 +61,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "isel" + /// LimitFloatPrecision - Generate low-precision inline sequences for /// some float libcalls (6, 8 or 12 bits). static unsigned LimitFloatPrecision; @@ -168,7 +169,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { @@ -214,6 +215,20 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, llvm_unreachable("Unknown mismatch!"); } +static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, + const Twine &ErrMsg) { + const Instruction *I = dyn_cast_or_null<Instruction>(V); + if (!V) + return Ctx.emitError(ErrMsg); + + const char *AsmError = ", possible invalid constraint for vector type"; + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + return Ctx.emitError(I, ErrMsg + AsmError); + + return Ctx.emitError(I, ErrMsg); +} + /// getCopyFromPartsVector - Create a value that contains the specified legal /// parts combined into the value they represent. If the parts combine to a /// type larger then ValueVT then AssertOp can be used to specify whether the @@ -262,9 +277,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. - Val = DAG.getNode(IntermediateVT.isVector() ? - ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, - ValueVT, &Ops[0], NumIntermediates); + Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS + : ISD::BUILD_VECTOR, + DL, ValueVT, Ops); } // There is now one part, held in Val. Correct it to match ValueVT. @@ -306,16 +321,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() != 1) { - LLVMContext &Ctx = *DAG.getContext(); - Twine ErrMsg("non-trivial scalar-to-vector conversion"); - if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { - if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (isa<InlineAsm>(CI->getCalledValue())) - ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; - Ctx.emitError(I, ErrMsg); - } else { - Ctx.emitError(ErrMsg); - } + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "non-trivial scalar-to-vector conversion"); return DAG.getUNDEF(ValueVT); } @@ -397,18 +404,9 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - if (PartEVT != ValueVT) { - LLVMContext &Ctx = *DAG.getContext(); - Twine ErrMsg("scalar-to-vector conversion failed"); - if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { - if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (isa<InlineAsm>(CI->getCalledValue())) - ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; - Ctx.emitError(I, ErrMsg); - } else { - Ctx.emitError(ErrMsg); - } - } + if (PartEVT != ValueVT) + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "scalar-to-vector conversion failed"); Parts[0] = Val; return; @@ -498,7 +496,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, e = PartVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ElementVT)); - Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); // FIXME: Use CONCAT for 2x -> 4x. @@ -627,16 +625,6 @@ namespace { } } - /// areValueTypesLegal - Return true if types of all the values are legal. - bool areValueTypesLegal(const TargetLowering &TLI) { - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - MVT RegisterVT = RegVTs[Value]; - if (!TLI.isTypeLegal(RegisterVT)) - return false; - } - return true; - } - /// append - Add the specified values to this one. void append(const RegsForValue &RHS) { ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); @@ -651,7 +639,7 @@ namespace { SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, SDLoc dl, SDValue &Chain, SDValue *Flag, - const Value *V = 0) const; + const Value *V = nullptr) const; /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses @@ -697,7 +685,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue P; - if (Flag == 0) { + if (!Flag) { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); } else { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); @@ -765,9 +753,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, Parts.clear(); } - return DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&ValueVTs[0], ValueVTs.size()), - &Values[0], ValueVTs.size()); + return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); } /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the @@ -798,7 +784,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SmallVector<SDValue, 8> Chains(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue Part; - if (Flag == 0) { + if (!Flag) { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); } else { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); @@ -821,7 +807,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, // = op c3, ..., f2 Chain = Chains[NumRegs-1]; else - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } /// AddInlineAsmOperands - Add this value to the specified inlineasm node @@ -874,7 +860,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - TD = DAG.getTarget().getDataLayout(); + DL = DAG.getTarget().getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -890,8 +876,9 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); - CurInst = NULL; + CurInst = nullptr; HasTailCall = false; + SDNodeOrder = LowestSDNodeOrder; } /// clearDanglingDebugInfo - Clear the dangling debug information @@ -922,7 +909,7 @@ SDValue SelectionDAGBuilder::getRoot() { // Otherwise, we have to make a token factor node. SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, - &PendingLoads[0], PendingLoads.size()); + PendingLoads); PendingLoads.clear(); DAG.setRoot(Root); return Root; @@ -952,8 +939,7 @@ SDValue SelectionDAGBuilder::getControlRoot() { } Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, - &PendingExports[0], - PendingExports.size()); + PendingExports); PendingExports.clear(); DAG.setRoot(Root); return Root; @@ -973,7 +959,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) { if (!isa<TerminatorInst>(&I) && !HasTailCall) CopyToExportRegsIfNeeded(&I); - CurInst = NULL; + CurInst = nullptr; } void SelectionDAGBuilder::visitPHI(const PHINode &) { @@ -1003,11 +989,14 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); MDNode *Variable = DI->getVariable(); uint64_t Offset = DI->getOffset(); + // A dbg.value for an alloca is always indirect. + bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, Val)) { SDV = DAG.getDbgValue(Variable, Val.getNode(), - Val.getResNo(), Offset, dl, DbgSDNodeOrder); + Val.getResNo(), IsIndirect, + Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1032,7 +1021,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(), InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); resolveDanglingDebugInfo(V, N); return N; } @@ -1103,8 +1092,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Constants.push_back(SDValue(Val, i)); } - return DAG.getMergeValues(&Constants[0], Constants.size(), - getCurSDLoc()); + return DAG.getMergeValues(Constants, getCurSDLoc()); } if (const ConstantDataSequential *CDS = @@ -1119,9 +1107,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } if (isa<ArrayType>(CDS->getType())) - return DAG.getMergeValues(&Ops[0], Ops.size(), getCurSDLoc()); + return DAG.getMergeValues(Ops, getCurSDLoc()); return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, &Ops[0], Ops.size()); + VT, Ops); } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { @@ -1144,8 +1132,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Constants[i] = DAG.getConstant(0, EltVT); } - return DAG.getMergeValues(&Constants[0], NumElts, - getCurSDLoc()); + return DAG.getMergeValues(Constants, getCurSDLoc()); } if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) @@ -1173,8 +1160,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, &Ops[0], Ops.size()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); } // If this is a static alloca, generate it as the frameindex instead of @@ -1191,7 +1177,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } llvm_unreachable("Can't get register for value!"); @@ -1235,7 +1221,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], NumValues); + MVT::Other, Chains); } else if (I.getNumOperands() != 0) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs); @@ -1392,7 +1378,9 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock *SwitchBB, + uint32_t TWeight, + uint32_t FWeight) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1416,8 +1404,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, llvm_unreachable("Unknown compare instruction"); } - CaseBlock CB(Condition, BOp->getOperand(0), - BOp->getOperand(1), NULL, TBB, FBB, CurBB); + CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, + TBB, FBB, CurBB, TWeight, FWeight); SwitchCases.push_back(CB); return; } @@ -1425,17 +1413,26 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), - NULL, TBB, FBB, CurBB); + nullptr, TBB, FBB, CurBB, TWeight, FWeight); SwitchCases.push_back(CB); } +/// Scale down both weights to fit into uint32_t. +static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { + uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; + uint32_t Scale = (NewMax / UINT32_MAX) + 1; + NewTrue = NewTrue / Scale; + NewFalse = NewFalse / Scale; +} + /// FindMergedConditions - If Cond is an expression like void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - unsigned Opc) { + unsigned Opc, uint32_t TWeight, + uint32_t FWeight) { // If this node is not part of the or/and tree, emit it as a branch. const Instruction *BOp = dyn_cast<Instruction>(Cond); if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || @@ -1443,7 +1440,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { - EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, + TWeight, FWeight); return; } @@ -1455,6 +1453,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, if (Opc == Instruction::Or) { // Codegen X | Y as: + // BB1: // jmp_if_X TBB // jmp TmpBB // TmpBB: @@ -1462,14 +1461,34 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // jmp FBB // + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice + // assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + + uint64_t NewTrueWeight = TWeight; + uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); + NewTrueWeight = TWeight; + NewFalseWeight = 2 * (uint64_t)FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: + // BB1: // jmp_if_X TmpBB // jmp FBB // TmpBB: @@ -1478,11 +1497,28 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // // This requires creation of TmpBB after CurBB. + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice + // assumes that + // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. + + uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; + uint64_t NewFalseWeight = FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); + NewTrueWeight = 2 * (uint64_t)TWeight; + NewFalseWeight = FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); } } @@ -1524,7 +1560,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = BrMBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1533,8 +1569,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Update machine-CFG edges. BrMBB->addSuccessor(Succ0MBB); - // If this is not a fall-through branch, emit the branch. - if (Succ0MBB != NextBlock) + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); @@ -1569,7 +1606,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - BOp->getOpcode()); + BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), + getEdgeWeight(BrMBB, Succ1MBB)); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. @@ -1599,7 +1637,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), - NULL, Succ0MBB, Succ1MBB, BrMBB); + nullptr, Succ0MBB, Succ1MBB, BrMBB); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. @@ -1615,7 +1653,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, SDLoc dl = getCurSDLoc(); // Build the setcc now. - if (CB.CmpMHS == NULL) { + if (!CB.CmpMHS) { // Fold "(X == true)" to X and "(X == false)" to !X to // handle common cases produced by branch lowering. if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && @@ -1656,7 +1694,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1734,7 +1772,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) @@ -1817,8 +1855,8 @@ void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering *TLI = TM.getTargetLowering(); SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, - MVT::isVoid, 0, 0, false, getCurSDLoc(), - false, false).second; + MVT::isVoid, nullptr, 0, false, + getCurSDLoc(), false, false).second; DAG.setRoot(Chain); } @@ -1865,7 +1903,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1939,7 +1977,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -2019,8 +2057,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // Merge into one. SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&ValueVTs[0], ValueVTs.size()), - &Ops[0], 2); + DAG.getVTList(ValueVTs), Ops); setValue(&LP, Res); } @@ -2041,7 +2078,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = CR.CaseBB; if (++BBI != FuncInfo.MF->end()) @@ -2152,7 +2189,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, if (I->High == I->Low) { // This is just small small case range :) containing exactly 1 case CC = ISD::SETEQ; - LHS = SV; RHS = I->High; MHS = NULL; + LHS = SV; RHS = I->High; MHS = nullptr; } else { CC = ISD::SETLE; LHS = I->Low; MHS = SV; RHS = I->High; @@ -2359,7 +2396,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, volatile double RDensity = (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); - double Metric = Range.logBase2()*(LDensity+RDensity); + volatile double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place DEBUG(dbgs() <<"=>Step\n" << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' @@ -2387,7 +2424,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRange LHSR(CR.Range.first, Pivot); CaseRange RHSR(Pivot, CR.Range.second); const Constant *C = Pivot->Low; - MachineBasicBlock *FalseBB = 0, *TrueBB = 0; + MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr; // We know that we branch to the LHS if the Value being switched on is // less than the Pivot value, C. We use this to optimize our binary @@ -2429,7 +2466,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Create a CaseBlock record representing a conditional branch to // the LHS node if the value being switched on SV is less than C. // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB); if (CR.CaseBB == SwitchBB) visitSwitchCase(CB, SwitchBB); @@ -2598,7 +2635,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, if (Cases.size() >= 2) // Must recompute end() each iteration because it may be // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); + for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); J != Cases.end(); ) { const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); @@ -2642,7 +2679,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { MachineBasicBlock *SwitchMBB = FuncInfo.MBB; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; // If there is only the default destination, branch to it if it is not the @@ -2676,7 +2713,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Push the initial CaseRec onto the worklist CaseRecVector WorkList; - WorkList.push_back(CaseRec(SwitchMBB,0,0, + WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr, CaseRange(Cases.begin(),Cases.end()))); while (!WorkList.empty()) { @@ -2725,6 +2762,11 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { getValue(I.getAddress()))); } +void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { + if (DAG.getTarget().Options.TrapUnreachable) + DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); +} + void SelectionDAGBuilder::visitFSub(const User &I) { // -0.0 - X --> fneg Type *Ty = I.getType(); @@ -2742,8 +2784,22 @@ void SelectionDAGBuilder::visitFSub(const User &I) { void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(OpCode, getCurSDLoc(), - Op1.getValueType(), Op1, Op2)); + + bool nuw = false; + bool nsw = false; + bool exact = false; + if (const OverflowingBinaryOperator *OFBinOp = + dyn_cast<const OverflowingBinaryOperator>(&I)) { + nuw = OFBinOp->hasNoUnsignedWrap(); + nsw = OFBinOp->hasNoSignedWrap(); + } + if (const PossiblyExactOperator *ExactOp = + dyn_cast<const PossiblyExactOperator>(&I)) + exact = ExactOp->isExact(); + + SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), + Op1, Op2, nuw, nsw, exact); + setValue(&I, BinNodeValue); } void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { @@ -2774,8 +2830,25 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); } - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), - Op1.getValueType(), Op1, Op2)); + bool nuw = false; + bool nsw = false; + bool exact = false; + + if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { + + if (const OverflowingBinaryOperator *OFBinOp = + dyn_cast<const OverflowingBinaryOperator>(&I)) { + nuw = OFBinOp->hasNoUnsignedWrap(); + nsw = OFBinOp->hasNoSignedWrap(); + } + if (const PossiblyExactOperator *ExactOp = + dyn_cast<const PossiblyExactOperator>(&I)) + exact = ExactOp->isExact(); + } + + SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, + nuw, nsw, exact); + setValue(&I, Res); } void SelectionDAGBuilder::visitSDiv(const User &I) { @@ -2847,8 +2920,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { FalseVal.getResNo() + i)); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&ValueVTs[0], NumValues), - &Values[0], NumValues)); + DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitTrunc(const User &I) { @@ -2944,6 +3016,13 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { if (DestVT != N.getValueType()) setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), DestVT, N)); // convert types. + // Check if the original LLVM IR Operand was a ConstantInt, because getValue() + // might fold any kind of constant expression to an integer constant and that + // is not what we are looking for. Only regcognize a bitcast of a genuine + // constant integer as an opaque constant. + else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) + setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, + /*isOpaque*/true)); else setValue(&I, N); // noop cast. } @@ -3050,11 +3129,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MOps2[0] = Src2; Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, - &MOps1[0], NumConcat); + getCurSDLoc(), VT, MOps1); Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, - &MOps2[0], NumConcat); + getCurSDLoc(), VT, MOps2); // Readjust mask for new input vector length. SmallVector<int, 8> MappedOps; @@ -3172,8 +3249,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, &Ops[0], Ops.size())); + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { @@ -3215,8 +3291,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&AggValueVTs[0], NumAggValues), - &Values[0], NumAggValues)); + DAG.getVTList(AggValueVTs), Values)); } void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { @@ -3250,8 +3325,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&ValValueVTs[0], NumValValues), - &Values[0], NumValValues)); + DAG.getVTList(ValValueVTs), Values)); } void SelectionDAGBuilder::visitGetElementPtr(const User &I) { @@ -3269,7 +3343,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset - uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); + uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, DAG.getConstant(Offset, N.getValueType())); } @@ -3283,7 +3357,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; uint64_t Offs = - TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; EVT PTy = TLI->getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); @@ -3300,7 +3374,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // N = N + Idx * ElementSize; APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), - TD->getTypeAllocSize(Ty)); + DL->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend @@ -3373,8 +3447,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); - SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), - VTs, Ops, 3); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); @@ -3391,8 +3464,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != 0; - bool isInvariant = I.getMetadata("invariant.load") != 0; + bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; + bool isInvariant = I.getMetadata("invariant.load") != nullptr; unsigned Alignment = I.getAlignment(); const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); @@ -3406,7 +3479,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Root; bool ConstantMemory = false; - if (I.isVolatile() || NumValues > MaxParallelChains) + if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory( @@ -3419,6 +3492,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } + const TargetLowering *TLI = TM.getTargetLowering(); + if (isVolatile) + Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), NumValues)); @@ -3433,8 +3510,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], ChainI); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } @@ -3451,8 +3528,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } if (!ConstantMemory) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], ChainI); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + makeArrayRef(Chains.data(), ChainI)); if (isVolatile) DAG.setRoot(Chain); else @@ -3460,8 +3537,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&ValueVTs[0], NumValues), - &Values[0], NumValues)); + DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitStore(const StoreInst &I) { @@ -3489,7 +3565,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { NumValues)); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != 0; + bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; unsigned Alignment = I.getAlignment(); const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); @@ -3497,8 +3573,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], ChainI); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } @@ -3511,8 +3587,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { Chains[ChainI] = St; } - SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], ChainI); + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + makeArrayRef(Chains.data(), ChainI)); DAG.setRoot(StoreNode); } @@ -3525,48 +3601,48 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, if (Before) { if (Order == AcquireRelease || Order == SequentiallyConsistent) Order = Release; - else if (Order == Acquire || Order == Monotonic) + else if (Order == Acquire || Order == Monotonic || Order == Unordered) return Chain; } else { if (Order == AcquireRelease) Order = Acquire; - else if (Order == Release || Order == Monotonic) + else if (Order == Release || Order == Monotonic || Order == Unordered) return Chain; } SDValue Ops[3]; Ops[0] = Chain; Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); - return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3); + return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops); } void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); - AtomicOrdering Order = I.getOrdering(); + AtomicOrdering SuccessOrder = I.getSuccessOrdering(); + AtomicOrdering FailureOrder = I.getFailureOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); const TargetLowering *TLI = TM.getTargetLowering(); if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, DAG, *TLI); - SDValue L = - DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - getValue(I.getCompareOperand()).getSimpleValueType(), - InChain, - getValue(I.getPointerOperand()), - getValue(I.getCompareOperand()), - getValue(I.getNewValOperand()), - MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); + SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); + SDValue L = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, + getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), + getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), + 0 /* Alignment */, + TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, + TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); - SDValue OutChain = L.getValue(1); + SDValue OutChain = L.getValue(2); if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, DAG, *TLI); setValue(&I, L); @@ -3627,7 +3703,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { Ops[0] = getRoot(); Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy()); Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy()); - DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3)); + DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { @@ -3643,12 +3719,21 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), + MachineMemOperand::MOVolatile | + MachineMemOperand::MOLoad, + VT.getStoreSize(), + I.getAlignment() ? I.getAlignment() : + DAG.getEVTAlignment(VT)); + + InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = - DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, - getValue(I.getPointerOperand()), - I.getPointerOperand(), I.getAlignment(), - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, + getValue(I.getPointerOperand()), MMO, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, + Scope); SDValue OutChain = L.getValue(1); @@ -3734,27 +3819,23 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (HasChain) ValueVTs.push_back(MVT::Other); - SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + SDVTList VTs = DAG.getVTList(ValueVTs); // Create the node. SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), - VTs, &Ops[0], Ops.size(), - Info.memVT, + VTs, Ops, Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, Info.readMem, Info.writeMem); } else if (!HasChain) { - Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), - VTs, &Ops[0], Ops.size()); + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { - Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), - VTs, &Ops[0], Ops.size()); + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); } else { - Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), - VTs, &Ops[0], Ops.size()); + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); } if (HasChain) { @@ -4476,7 +4557,7 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, - int64_t Offset, + int64_t Offset, bool IsIndirect, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) @@ -4528,8 +4609,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (!Op) return false; - // FIXME: This does not handle register-indirect values at offset 0. - bool IsIndirect = Offset != 0; if (Op->isReg()) FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE), @@ -4565,18 +4644,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { default: // By default, turn this into a target intrinsic node. visitTargetIntrinsic(I, Intrinsic); - return 0; - case Intrinsic::vastart: visitVAStart(I); return 0; - case Intrinsic::vaend: visitVAEnd(I); return 0; - case Intrinsic::vacopy: visitVACopy(I); return 0; + return nullptr; + case Intrinsic::vastart: visitVAStart(I); return nullptr; + case Intrinsic::vaend: visitVAEnd(I); return nullptr; + case Intrinsic::vacopy: visitVACopy(I); return nullptr; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; + case Intrinsic::read_register: { + Value *Reg = I.getArgOperand(0); + SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg)); + EVT VT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); + return nullptr; + } + case Intrinsic::write_register: { + Value *Reg = I.getArgOperand(0); + Value *RegValue = I.getArgOperand(1); + SDValue Chain = getValue(RegValue).getOperand(0); + SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg)); + DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, + RegName, getValue(RegValue))); + return nullptr; + } case Intrinsic::setjmp: return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; case Intrinsic::longjmp: @@ -4599,7 +4694,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)))); - return 0; + return nullptr; } case Intrinsic::memset: { // Assert for address < 256 since we support only user defined address @@ -4616,7 +4711,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)))); - return 0; + return nullptr; } case Intrinsic::memmove: { // Assert for address < 256 since we support only user defined address @@ -4636,7 +4731,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)))); - return 0; + return nullptr; } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); @@ -4647,14 +4742,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { "Variable in DbgDeclareInst should be either null or a DIVariable."); if (!Address || !DIVar) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - return 0; + return nullptr; } // Check if address has undef value. if (isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - return 0; + return nullptr; } SDValue &N = NodeMap[Address]; @@ -4676,29 +4771,29 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); if (FINode) // Byval parameter. We have a frame index at this point. - SDV = DAG.getDbgValue(Variable, FINode->getIndex(), - 0, dl, SDNodeOrder); + SDV = DAG.getFrameIndexDbgValue(Variable, FINode->getIndex(), + 0, dl, SDNodeOrder); else { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, 0, N); - return 0; + EmitFuncArgumentDbgValue(Address, Variable, 0, false, N); + return nullptr; } } else if (AI) SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), - 0, dl, SDNodeOrder); + true, 0, dl, SDNodeOrder); else { // Can't do anything with other non-AI cases yet. DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); DEBUG(Address->dump()); - return 0; + return nullptr; } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { + if (!EmitFuncArgumentDbgValue(Address, Variable, 0, false, N)) { // If variable is pinned by a alloca in dominating bb then // use StaticAllocaMap. if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { @@ -4706,17 +4801,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { - SDV = DAG.getDbgValue(Variable, SI->second, - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); - return 0; + SDV = DAG.getFrameIndexDbgValue(Variable, SI->second, + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + return nullptr; } } } DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } - return 0; + return nullptr; } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); @@ -4724,18 +4819,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { assert((!DIVar || DIVar.isVariable()) && "Variable in DbgValueInst should be either null or a DIVariable."); if (!DIVar) - return 0; + return nullptr; MDNode *Variable = DI.getVariable(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) - return 0; + return nullptr; SDDbgValue *SDV; if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { - SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); + SDV = DAG.getConstantDbgValue(Variable, V, Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); } else { // Do not use getValue() in here; we don't want to generate code at // this point if it hasn't been done yet. @@ -4744,9 +4839,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) { + // A dbg.value for an alloca is always indirect. + bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, N)) { SDV = DAG.getDbgValue(Variable, N.getNode(), - N.getResNo(), Offset, dl, SDNodeOrder); + N.getResNo(), IsIndirect, + Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } } else if (!V->use_empty() ) { @@ -4769,18 +4867,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!AI) { DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); - return 0; + return nullptr; } DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI == FuncInfo.StaticAllocaMap.end()) - return 0; // VLAs. - int FI = SI->second; - - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) - MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); - return 0; + return nullptr; // VLAs. + return nullptr; } case Intrinsic::eh_typeid_for: { @@ -4789,7 +4882,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, MVT::i32); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::eh_return_i32: @@ -4800,10 +4893,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getControlRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return 0; + return nullptr; case Intrinsic::eh_unwind_init: DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); - return 0; + return nullptr; case Intrinsic::eh_dwarf_cfa: { SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, TLI->getPointerTy()); @@ -4817,7 +4910,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getConstant(0, TLI->getPointerTy())); setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); - return 0; + return nullptr; } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); @@ -4826,7 +4919,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); MMI.setCurrentCallSite(CI->getZExtValue()); - return 0; + return nullptr; } case Intrinsic::eh_sjlj_functioncontext: { // Get and store the index of the function context. @@ -4835,23 +4928,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); int FI = FuncInfo.StaticAllocaMap[FnCtx]; MFI->setFunctionContextIndex(FI); - return 0; + return nullptr; } case Intrinsic::eh_sjlj_setjmp: { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, - DAG.getVTList(MVT::i32, MVT::Other), - Ops, 2); + DAG.getVTList(MVT::i32, MVT::Other), Ops); setValue(&I, Op.getValue(0)); DAG.setRoot(Op.getValue(1)); - return 0; + return nullptr; } case Intrinsic::eh_sjlj_longjmp: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; } case Intrinsic::x86_mmx_pslli_w: @@ -4865,7 +4957,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShAmt = getValue(I.getArgOperand(1)); if (isa<ConstantSDNode>(ShAmt)) { visitTargetIntrinsic(I, Intrinsic); - return 0; + return nullptr; } unsigned NewIntrinsic = 0; EVT ShAmtVT = MVT::v2i32; @@ -4904,14 +4996,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, &ShOps[0], 2); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); EVT DestVT = TLI->getValueType(I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::x86_avx_vinsertf128_pd_256: case Intrinsic::x86_avx_vinsertf128_ps_256: @@ -4926,7 +5018,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::x86_avx_vextractf128_pd_256: case Intrinsic::x86_avx_vextractf128_ps_256: @@ -4939,7 +5031,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)), DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::convertff: case Intrinsic::convertfsi: @@ -4972,31 +5064,31 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2)), Code); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); - return 0; + return nullptr; case Intrinsic::log: setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::log2: setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::log10: setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::exp: setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::exp2: setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::sqrt: case Intrinsic::fabs: case Intrinsic::sin: @@ -5025,21 +5117,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; } case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return 0; + return nullptr; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); - return 0; + return nullptr; case Intrinsic::fmuladd: { EVT VT = TLI->getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -5060,42 +5152,45 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2))); setValue(&I, Add); } - return 0; + return nullptr; } case Intrinsic::convert_to_fp16: - setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl, - MVT::i16, getValue(I.getArgOperand(0)))); - return 0; + setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, + DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, + getValue(I.getArgOperand(0)), + DAG.getTargetConstant(0, MVT::i32)))); + return nullptr; case Intrinsic::convert_from_fp16: - setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl, - MVT::f32, getValue(I.getArgOperand(0)))); - return 0; + setValue(&I, + DAG.getNode(ISD::FP_EXTEND, sdl, TLI->getValueType(I.getType()), + DAG.getNode(ISD::BITCAST, sdl, MVT::f16, + getValue(I.getArgOperand(0))))); + return nullptr; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); - return 0; + return nullptr; } case Intrinsic::readcyclecounter: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, - DAG.getVTList(MVT::i64, MVT::Other), - &Op, 1); + DAG.getVTList(MVT::i64, MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - return 0; + return nullptr; } case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, sdl, Ty, Arg)); - return 0; + return nullptr; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getArgOperand(0)); @@ -5103,26 +5198,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, sdl, Ty, Arg)); - return 0; + return nullptr; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); - return 0; + return nullptr; } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::STACKSAVE, sdl, - DAG.getVTList(TLI->getPointerTy(), MVT::Other), &Op, 1); + DAG.getVTList(TLI->getPointerTy(), MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - return 0; + return nullptr; } case Intrinsic::stackrestore: { Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); - return 0; + return nullptr; } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. @@ -5144,7 +5239,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { true, false, 0); setValue(&I, Res); DAG.setRoot(Res); - return 0; + return nullptr; } case Intrinsic::objectsize: { // If we don't know by now, we're never going to know. @@ -5161,16 +5256,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getConstant(0, Ty); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::annotation: case Intrinsic::ptr_annotation: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); - return 0; + return nullptr; case Intrinsic::var_annotation: // Discard annotate attributes - return 0; + return nullptr; case Intrinsic::init_trampoline: { const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); @@ -5183,16 +5278,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); - Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops, 6); + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); DAG.setRoot(Res); - return 0; + return nullptr; } case Intrinsic::adjust_trampoline: { setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; } case Intrinsic::gcroot: if (GFI) { @@ -5202,18 +5297,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); } - return 0; + return nullptr; case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); - return 0; + return nullptr; case Intrinsic::expect: { // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); - return 0; + return nullptr; } case Intrinsic::debugtrap: @@ -5223,20 +5318,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); - return 0; + return nullptr; } TargetLowering::ArgListTy Args; - TargetLowering:: - CallLoweringInfo CLI(getRoot(), I.getType(), - false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol(TrapFuncName.data(), - TLI->getPointerTy()), - Args, DAG, sdl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()) + .setCallee(CallingConv::C, I.getType(), + DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()), + std::move(Args), 0); + std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); DAG.setRoot(Result.second); - return 0; + return nullptr; } case Intrinsic::uadd_with_overflow: @@ -5260,7 +5354,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); - return 0; + return nullptr; } case Intrinsic::prefetch: { SDValue Ops[5]; @@ -5271,25 +5365,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, - DAG.getVTList(MVT::Other), - &Ops[0], 5, + DAG.getVTList(MVT::Other), Ops, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ false, /* volatile */ rw==0, /* read */ rw==1)); /* write */ - return 0; + return nullptr; } case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { bool IsStart = (Intrinsic == Intrinsic::lifetime_start); // Stack coloring is not enabled in O0, discard region information. if (TM.getOptLevel() == CodeGenOpt::None) - return 0; + return nullptr; SmallVector<Value *, 4> Allocas; - GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); + GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { @@ -5306,18 +5399,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); - Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops, 2); + Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); DAG.setRoot(Res); } - return 0; + return nullptr; } case Intrinsic::invariant_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI->getPointerTy())); - return 0; + return nullptr; case Intrinsic::invariant_end: // Discard region information. - return 0; + return nullptr; case Intrinsic::stackprotectorcheck: { // Do not actually emit anything for this basic block. Instead we initialize // the stack protector descriptor and export the guard variable so we can @@ -5328,19 +5421,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Flush our exports since we are going to process a terminator. (void)getControlRoot(); - return 0; + return nullptr; } + case Intrinsic::clear_cache: + return TLI->getClearCacheBuiltinName(); case Intrinsic::donothing: // ignore - return 0; + return nullptr; case Intrinsic::experimental_stackmap: { visitStackmap(I); - return 0; + return nullptr; } case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: { visitPatchpoint(I); - return 0; + return nullptr; } } } @@ -5348,53 +5443,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, MachineBasicBlock *LandingPad) { + const TargetLowering *TLI = TM.getTargetLowering(); PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); FunctionType *FTy = cast<FunctionType>(PT->getElementType()); Type *RetTy = FTy->getReturnType(); MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - MCSymbol *BeginLabel = 0; + MCSymbol *BeginLabel = nullptr; TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); - // Check whether the function can return without sret-demotion. - SmallVector<ISD::OutputArg, 4> Outs; - const TargetLowering *TLI = TM.getTargetLowering(); - GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI); - - bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(), - DAG.getMachineFunction(), - FTy->isVarArg(), Outs, - FTy->getContext()); - - SDValue DemoteStackSlot; - int DemoteStackIdx = -100; - - if (!CanLowerReturn) { - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( - FTy->getReturnType()); - unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( - FTy->getReturnType()); - MachineFunction &MF = DAG.getMachineFunction(); - DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - - DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy()); - Entry.Node = DemoteStackSlot; - Entry.Ty = StackSlotPtrType; - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isInReg = false; - Entry.isSRet = true; - Entry.isNest = false; - Entry.isByVal = false; - Entry.isReturned = false; - Entry.Alignment = Align; - Args.push_back(Entry); - RetTy = Type::getVoidTy(FTy->getContext()); - } - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; @@ -5435,58 +5494,20 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, *TLI)) + if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) isTailCall = false; - TargetLowering:: - CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, - getCurSDLoc(), CS); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) + .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall); + std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && "Null value expected with tail call!"); - if (Result.first.getNode()) { + if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); - } else if (!CanLowerReturn && Result.second.getNode()) { - // The instruction result is the result of loading from the - // hidden sret parameter. - SmallVector<EVT, 1> PVTs; - Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); - - ComputeValueVTs(*TLI, PtrRetTy, PVTs); - assert(PVTs.size() == 1 && "Pointers should fit in one register"); - EVT PtrVT = PVTs[0]; - - SmallVector<EVT, 4> RetTys; - SmallVector<uint64_t, 4> Offsets; - RetTy = FTy->getReturnType(); - ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets); - - unsigned NumValues = RetTys.size(); - SmallVector<SDValue, 4> Values(NumValues); - SmallVector<SDValue, 4> Chains(NumValues); - - for (unsigned i = 0; i < NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, - DemoteStackSlot, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add, - MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), - false, false, false, 1); - Values[i] = L; - Chains[i] = L.getValue(1); - } - - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], NumValues); - PendingLoads.push_back(Chain); - - setValue(CS.getInstruction(), - DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&RetTys[0], RetTys.size()), - &Values[0], Values.size())); - } if (!Result.second.getNode()) { // As a special case, a null chain means that a tail call has been emitted @@ -5514,9 +5535,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the /// value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + for (const User *U : V->users()) { + if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) if (IC->isEquality()) if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) if (C->isNullValue()) @@ -5540,7 +5560,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), - Builder.TD)) + Builder.DL)) return Builder.getValue(LoadCst); } @@ -5626,7 +5646,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { switch (CSize->getZExtValue()) { default: LoadVT = MVT::Other; - LoadTy = 0; + LoadTy = nullptr; ActuallyDoIt = false; break; case 2: @@ -5659,9 +5679,13 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // bloat the code. const TargetLowering *TLI = TM.getTargetLowering(); if (ActuallyDoIt && CSize->getZExtValue() > 4) { + unsigned DstAS = LHS->getType()->getPointerAddressSpace(); + unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) + if (!TLI->isTypeLegal(LoadVT) || + !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) || + !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS)) ActuallyDoIt = false; } @@ -5849,7 +5873,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ComputeUsesVAFloatArgument(I, &MMI); - const char *RenameFn = 0; + const char *RenameFn = nullptr; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { @@ -6024,7 +6048,7 @@ public: RegsForValue AssignedRegs; explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) - : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { + : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { } /// getCallOperandValEVT - Return the EVT of the Value* that this operand @@ -6032,8 +6056,8 @@ public: /// MVT::Other. EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout *TD) const { - if (CallOperandVal == 0) return MVT::Other; + const DataLayout *DL) const { + if (!CallOperandVal) return MVT::Other; if (isa<BasicBlock>(CallOperandVal)) return TLI.getPointerTy(); @@ -6058,7 +6082,7 @@ public: // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = TD->getTypeSizeInBits(OpTy); + unsigned BitSize = DL->getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: @@ -6114,7 +6138,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, // types are identical size, use a bitcast to convert (e.g. two differing // vector types). MVT RegVT = *PhysReg.second->vt_begin(); - if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; @@ -6247,7 +6271,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD). + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL). getSimpleVT(); } @@ -6354,7 +6378,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // There is no longer a Value* corresponding to this operand. - OpInfo.CallOperandVal = 0; + OpInfo.CallOperandVal = nullptr; // It is now an indirect operand. OpInfo.isIndirect = true; @@ -6643,8 +6667,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), - DAG.getVTList(MVT::Other, MVT::Glue), - &AsmNodeOperands[0], AsmNodeOperands.size()); + DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); Flag = Chain.getValue(1); // If this asm returns a register value, copy the result from that register @@ -6707,8 +6730,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } if (!OutChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, - &OutChains[0], OutChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); DAG.setRoot(Chain); } @@ -6722,11 +6744,11 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering *TLI = TM.getTargetLowering(); - const DataLayout &TD = *TLI->getDataLayout(); + const DataLayout &DL = *TLI->getDataLayout(); SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), - TD.getABITypeAlignment(I.getType())); + DL.getABITypeAlignment(I.getType())); setValue(&I, V); DAG.setRoot(V.getValue(1)); } @@ -6778,15 +6800,51 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, } Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); - TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false, - /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs, - CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false, - /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc()); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) + .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) + .setDiscardResult(!CI.use_empty()); const TargetLowering *TLI = TM.getTargetLowering(); return TLI->LowerCallTo(CLI); } +/// \brief Add a stack map intrinsic call's live variable operands to a stackmap +/// or patchpoint target node's operand list. +/// +/// Constants are converted to TargetConstants purely as an optimization to +/// avoid constant materialization and register allocation. +/// +/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not +/// generate addess computation nodes, and so ExpandISelPseudo can convert the +/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids +/// address materialization and register allocation, but may also be required +/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an +/// alloca in the entry block, then the runtime may assume that the alloca's +/// StackMap location can be read immediately after compilation and that the +/// location is valid at any point during execution (this is similar to the +/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were +/// only available in a register, then the runtime would need to trap when +/// execution reaches the StackMap in order to read the alloca's location. +static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx, + SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder &Builder) { + for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) { + SDValue OpVal = Builder.getValue(CI.getArgOperand(i)); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { + const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); + Ops.push_back( + Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + } else + Ops.push_back(OpVal); + } +} + /// \brief Lower llvm.experimental.stackmap directly to its target opcode. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, @@ -6794,61 +6852,64 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); - SDValue Callee = getValue(CI.getCalledValue()); + SDValue Chain, InFlag, Callee, NullPtr; + SmallVector<SDValue, 32> Ops; - // Lower into a call sequence with no args and no return value. - std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee); - // Set the root to the target-lowered call chain. - SDValue Chain = Result.second; - DAG.setRoot(Chain); + SDLoc DL = getCurSDLoc(); + Callee = getValue(CI.getCalledValue()); + NullPtr = DAG.getIntPtrConstant(0, true); - /// Get a call instruction from the call sequence chain. - /// Tail calls are not allowed. - SDNode *CallEnd = Chain.getNode(); - assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && - "Expected a callseq node."); - SDNode *Call = CallEnd->getOperand(0).getNode(); - bool hasGlue = Call->getGluedNode(); + // The stackmap intrinsic only records the live variables (the arguemnts + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target specific lowering code. + // Instead we perform the call lowering right here. + // + // chain, flag = CALLSEQ_START(chain, 0) + // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) + // chain, flag = CALLSEQ_END(chain, 0, 0, flag) + // + Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); + InFlag = Chain.getValue(1); - // Replace the target specific call node with the stackmap intrinsic. - SmallVector<SDValue, 8> Ops; + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); - // Add the <id> and <numShadowBytes> constants. - for (unsigned i = 0; i < 2; ++i) { - SDValue tmp = getValue(CI.getOperand(i)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); - } // Push live variables for the stack map. - for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i) - Ops.push_back(getValue(CI.getArgOperand(i))); + addStackMapLiveVars(CI, 2, Ops, *this); - // Push the chain (this is originally the first operand of the call, but - // becomes now the last or second to last operand). - Ops.push_back(*(Call->op_begin())); + // We are not pushing any register mask info here on the operands list, + // because the stackmap doesn't clobber anything. - // Push the glue flag (last operand). - if (hasGlue) - Ops.push_back(*(Call->op_end()-1)); + // Push the chain and the glue flag. + Ops.push_back(Chain); + Ops.push_back(InFlag); + // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); + Chain = SDValue(SM, 0); + InFlag = Chain.getValue(1); - // Replace the target specific call node with a STACKMAP node. - MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(), - NodeTys, Ops); + Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); - // StackMap generates no value, so nothing goes in the NodeMap. + // Stackmaps don't generate values, so nothing goes into the NodeMap. - // Fixup the consumers of the intrinsic. The chain and glue may be used in the - // call sequence. - DAG.ReplaceAllUsesWith(Call, MN); + // Set the root to the target-lowered call chain. + DAG.setRoot(Chain); - DAG.DeleteNode(Call); + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo()->setHasStackMap(); } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { - // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>, + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, // i8* <target>, // i32 <numArgs>, @@ -6861,17 +6922,19 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { SDValue Callee = getValue(CI.getOperand(2)); // <target> // Get the real number of arguments participating in the call <numArgs> - unsigned NumArgs = - cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue(); + SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos)); + unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> - assert(CI.getNumArgOperands() >= NumArgs + 4 && + // Intrinsics include all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; std::pair<SDValue, SDValue> Result = - LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC); // Set the root to the target-lowered call chain. SDValue Chain = Result.second; @@ -6891,13 +6954,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Replace the target specific call node with the patchable intrinsic. SmallVector<SDValue, 8> Ops; - // Add the <id> and <numNopBytes> constants. - for (unsigned i = 0; i < 2; ++i) { - SDValue tmp = getValue(CI.getOperand(i)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); - } + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); + // Assume that the Callee is a constant address. + // FIXME: handle function symbols in the future. Ops.push_back( DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), /*isTarget=*/true)); @@ -6915,25 +6981,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Add the arguments we omitted previously. The register allocator should // place these in any free register. if (isAnyRegCC) - for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) Ops.push_back(getValue(CI.getArgOperand(i))); - // Push the arguments from the call instruction. + // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) Ops.push_back(*i); // Push live variables for the stack map. - for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) { - SDValue OpVal = getValue(CI.getArgOperand(i)); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { - Ops.push_back( - DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); - Ops.push_back( - DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); - } else - Ops.push_back(OpVal); - } + addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this); // Push the register mask info. if (hasGlue) @@ -6960,7 +7017,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // There is always a chain and a glue type at the end ValueVTs.push_back(MVT::Other); ValueVTs.push_back(MVT::Glue); - NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + NodeTys = DAG.getVTList(ValueVTs); } else NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -6987,6 +7044,24 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { } else DAG.ReplaceAllUsesWith(Call, MN); DAG.DeleteNode(Call); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); +} + +/// Returns an AttributeSet representing the attributes applied to the return +/// value of the given call. +static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { + SmallVector<Attribute::AttrKind, 2> Attrs; + if (CLI.RetSExt) + Attrs.push_back(Attribute::SExt); + if (CLI.RetZExt) + Attrs.push_back(Attribute::ZExt); + if (CLI.IsInReg) + Attrs.push_back(Attribute::InReg); + + return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, + Attrs); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo @@ -6997,43 +7072,85 @@ std::pair<SDValue, SDValue> TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Handle the incoming return values from the call. CLI.Ins.clear(); + Type *OrigRetTy = CLI.RetTy; SmallVector<EVT, 4> RetTys; - ComputeValueVTs(*this, CLI.RetTy, RetTys); - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; - MyFlags.ArgVT = VT; - MyFlags.Used = CLI.IsReturnValueUsed; - if (CLI.RetSExt) - MyFlags.Flags.setSExt(); - if (CLI.RetZExt) - MyFlags.Flags.setZExt(); - if (CLI.IsInReg) - MyFlags.Flags.setInReg(); - CLI.Ins.push_back(MyFlags); + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets); + + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this); + + bool CanLowerReturn = + this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), + CLI.IsVarArg, Outs, CLI.RetTy->getContext()); + + SDValue DemoteStackSlot; + int DemoteStackIdx = -100; + if (!CanLowerReturn) { + // FIXME: equivalent assert? + // assert(!CS.hasInAllocaArgument() && + // "sret demotion is incompatible with inalloca"); + uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy); + unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy); + MachineFunction &MF = CLI.DAG.getMachineFunction(); + DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); + + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy()); + ArgListEntry Entry; + Entry.Node = DemoteStackSlot; + Entry.Ty = StackSlotPtrType; + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isInReg = false; + Entry.isSRet = true; + Entry.isNest = false; + Entry.isByVal = false; + Entry.isReturned = false; + Entry.Alignment = Align; + CLI.getArgs().insert(CLI.getArgs().begin(), Entry); + CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); + } else { + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } } } // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); - ArgListTy &Args = CLI.Args; + ArgListTy &Args = CLI.getArgs(); for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); - for (unsigned Value = 0, NumValues = ValueVTs.size(); - Value != NumValues; ++Value) { + Type *FinalType = Args[i].Ty; + if (Args[i].isByVal) + FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); + bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( + FinalType, CLI.CallConv, CLI.IsVarArg); + for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; + ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = - getDataLayout()->getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy); if (Args[i].isZExt) Flags.setZExt(); @@ -7043,8 +7160,18 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setInReg(); if (Args[i].isSRet) Flags.setSRet(); - if (Args[i].isByVal) { + if (Args[i].isByVal) + Flags.setByVal(); + if (Args[i].isInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. Flags.setByVal(); + } + if (Args[i].isByVal || Args[i].isInAlloca) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); @@ -7059,6 +7186,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } if (Args[i].isNest) Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -7091,8 +7220,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setReturned(); } - getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, - PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, + CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -7104,6 +7233,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (j != 0) MyFlags.Flags.setOrigAlign(1); + // Only mark the end at the last register of the last value. + if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1) + MyFlags.Flags.setInConsecutiveRegsLast(); + CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } @@ -7137,35 +7270,62 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { "LowerCall emitted a value with the wrong type!"); }); - // Collect the legal value parts into potentially illegal values - // that correspond to the original function's return values. - ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (CLI.RetSExt) - AssertOp = ISD::AssertSext; - else if (CLI.RetZExt) - AssertOp = ISD::AssertZext; SmallVector<SDValue, 4> ReturnValues; - unsigned CurReg = 0; - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + if (!CanLowerReturn) { + // The instruction result is the result of loading from the + // hidden sret parameter. + SmallVector<EVT, 1> PVTs; + Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); - ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], - NumRegs, RegisterVT, VT, NULL, - AssertOp)); - CurReg += NumRegs; - } + ComputeValueVTs(*this, PtrRetTy, PVTs); + assert(PVTs.size() == 1 && "Pointers should fit in one register"); + EVT PtrVT = PVTs[0]; + + unsigned NumValues = RetTys.size(); + ReturnValues.resize(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + + for (unsigned i = 0; i < NumValues; ++i) { + SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, + CLI.DAG.getConstant(Offsets[i], PtrVT)); + SDValue L = CLI.DAG.getLoad( + RetTys[i], CLI.DL, CLI.Chain, Add, + MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, + false, false, 1); + ReturnValues[i] = L; + Chains[i] = L.getValue(1); + } + + CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); + } else { + // Collect the legal value parts into potentially illegal values + // that correspond to the original function's return values. + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (CLI.RetSExt) + AssertOp = ISD::AssertSext; + else if (CLI.RetZExt) + AssertOp = ISD::AssertZext; + unsigned CurReg = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + + ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], + NumRegs, RegisterVT, VT, nullptr, + AssertOp)); + CurReg += NumRegs; + } - // For a function returning void, there is no return value. We can't create - // such a node, so we just return a null return value in that case. In - // that case, nothing will actually look at the value. - if (ReturnValues.empty()) - return std::make_pair(SDValue(), CLI.Chain); + // For a function returning void, there is no return value. We can't create + // such a node, so we just return a null return value in that case. In + // that case, nothing will actually look at the value. + if (ReturnValues.empty()) + return std::make_pair(SDValue(), CLI.Chain); + } SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, - CLI.DAG.getVTList(&RetTys[0], RetTys.size()), - &ReturnValues[0], ReturnValues.size()); + CLI.DAG.getVTList(RetTys), ReturnValues); return std::make_pair(Res, CLI.Chain); } @@ -7192,7 +7352,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { const TargetLowering *TLI = TM.getTargetLowering(); RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, 0, V); + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V); PendingExports.push_back(Chain); } @@ -7208,12 +7368,10 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return A->use_empty(); const BasicBlock *Entry = A->getParent()->begin(); - for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) { - const User *U = *UI; + for (const User *U : A->users()) if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) return false; // Use not in entry block. - } + return true; } @@ -7221,7 +7379,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); const TargetLowering *TLI = getTargetLowering(); - const DataLayout *TD = TLI->getDataLayout(); + const DataLayout *DL = TLI->getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; if (!FuncInfo->CanLowerReturn) { @@ -7247,13 +7405,17 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ComputeValueVTs(*TLI, I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); unsigned PartBase = 0; + Type *FinalType = I->getType(); + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + FinalType = cast<PointerType>(FinalType)->getElementType(); + bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( + FinalType, F.getCallingConv(), F.isVarArg()); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = - TD->getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy); if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); @@ -7263,11 +7425,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setInReg(); if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) { + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) Flags.setByVal(); + if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast<PointerType>(I->getType()); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); + Flags.setByValSize(DL->getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; @@ -7279,6 +7451,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); @@ -7291,6 +7465,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // if it isn't first piece, alignment must be 1 else if (i > 0) MyFlags.Flags.setOrigAlign(1); + + // Only mark the end at the last register of the last value. + if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1) + MyFlags.Flags.setInConsecutiveRegsLast(); + Ins.push_back(MyFlags); } PartBase += VT.getStoreSize(); @@ -7332,7 +7511,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, - RegVT, VT, NULL, AssertOp); + RegVT, VT, nullptr, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); @@ -7379,7 +7558,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, - NULL, AssertOp)); + nullptr, AssertOp)); } i += NumParts; @@ -7394,7 +7573,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); - SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, + SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); SDB->setValue(I, Res); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 835f643..84679f9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,9 +18,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include <vector> @@ -57,6 +56,7 @@ class MachineBasicBlock; class MachineInstr; class MachineRegisterInfo; class MDNode; +class MVT; class PHINode; class PtrToIntInst; class ReturnInst; @@ -96,7 +96,7 @@ class SelectionDAGBuilder { DebugLoc dl; unsigned SDNodeOrder; public: - DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { } + DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { } DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) : DI(di), dl(DL), SDNodeOrder(SDNO) { } const DbgValueInst* getDI() { return DI; } @@ -135,7 +135,7 @@ private: MachineBasicBlock* BB; uint32_t ExtraWeight; - Case() : Low(0), High(0), BB(0), ExtraWeight(0) { } + Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { } Case(const Constant *low, const Constant *high, MachineBasicBlock *bb, uint32_t extraweight) : Low(low), High(high), BB(bb), ExtraWeight(extraweight) { } @@ -320,7 +320,7 @@ private: /// 1. Preserve the architecture independence of stack protector generation. /// /// 2. Preserve the normal IR level stack protector check for platforms like - /// OpenBSD for which we support platform specific stack protector + /// OpenBSD for which we support platform-specific stack protector /// generation. /// /// The main problem that guided the present solution is that one can not @@ -338,7 +338,7 @@ private: /// basic block (where the return inst is placed) and then move it back /// later at SelectionDAG/MI time before the stack protector check if the /// tail call optimization failed. The MI level option was nixed - /// immediately since it would require platform specific pattern + /// immediately since it would require platform-specific pattern /// matching. The SelectionDAG level option was nixed because /// SelectionDAG only processes one IR level basic block at a time /// implying one could not create a DAG Combine to move the callinst. @@ -396,8 +396,8 @@ private: /// the same function, use the same failure basic block). class StackProtectorDescriptor { public: - StackProtectorDescriptor() : ParentMBB(0), SuccessMBB(0), FailureMBB(0), - Guard(0) { } + StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr), + FailureMBB(nullptr), Guard(nullptr) { } ~StackProtectorDescriptor() { } /// Returns true if all fields of the stack protector descriptor are @@ -432,8 +432,8 @@ private: /// parent mbb after we create the stack protector check (SuccessMBB). This /// BB is visited only on stack protector check success. void resetPerBBState() { - ParentMBB = 0; - SuccessMBB = 0; + ParentMBB = nullptr; + SuccessMBB = nullptr; } /// Reset state that only changes when we switch functions. @@ -446,8 +446,8 @@ private: /// 2.The guard variable since the guard variable we are checking against is /// always the same. void resetPerFunctionState() { - FailureMBB = 0; - Guard = 0; + FailureMBB = nullptr; + Guard = nullptr; } MachineBasicBlock *getParentMBB() { return ParentMBB; } @@ -482,14 +482,18 @@ private: /// block will be created. MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, MachineBasicBlock *ParentMBB, - MachineBasicBlock *SuccMBB = 0); + MachineBasicBlock *SuccMBB = nullptr); }; private: const TargetMachine &TM; public: + /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling + /// nodes without a corresponding SDNode. + static const unsigned LowestSDNodeOrder = 1; + SelectionDAG &DAG; - const DataLayout *TD; + const DataLayout *DL; AliasAnalysis *AA; const TargetLibraryInfo *LibInfo; @@ -534,7 +538,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) - : CurInst(NULL), SDNodeOrder(0), TM(dag.getTarget()), + : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), HasTailCall(false) { } @@ -596,29 +600,31 @@ public: void setValue(const Value *V, SDValue NewN) { SDValue &N = NodeMap[V]; - assert(N.getNode() == 0 && "Already set a value for this node!"); + assert(!N.getNode() && "Already set a value for this node!"); N = NewN; } void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; - assert(N.getNode() == 0 && "Already set a value for this node!"); + assert(!N.getNode() && "Already set a value for this node!"); N = NewN; } void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB, unsigned Opc); + MachineBasicBlock *SwitchBB, unsigned Opc, + uint32_t TW, uint32_t FW); void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB); + MachineBasicBlock *SwitchBB, + uint32_t TW, uint32_t FW); bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); void CopyToExportRegsIfNeeded(const Value *V); void ExportFromCurrentBlock(const Value *V); void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, - MachineBasicBlock *LandingPad = NULL); + MachineBasicBlock *LandingPad = nullptr); std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI, unsigned ArgIdx, @@ -627,7 +633,7 @@ public: bool useVoidTy = false); /// UpdateSplitBlock - When an MBB was split during scheduling, update the - /// references that ned to refer to the last resulting block. + /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); private: @@ -636,7 +642,7 @@ private: void visitBr(const BranchInst &I); void visitSwitch(const SwitchInst &I); void visitIndirectBr(const IndirectBrInst &I); - void visitUnreachable(const UnreachableInst &I) { /* noop */ } + void visitUnreachable(const UnreachableInst &I); // Helpers for visitSwitch bool handleSmallSwitchRange(CaseRec& CR, @@ -779,7 +785,8 @@ private: /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, - int64_t Offset, const SDValue &N); + int64_t Offset, bool IsIndirect, + const SDValue &N); }; } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index c04a08d..a71cc68 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -14,11 +14,10 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" @@ -56,6 +55,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::PREFETCH: return "Prefetch"; case ISD::ATOMIC_FENCE: return "AtomicFence"; case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess"; case ISD::ATOMIC_SWAP: return "AtomicSwap"; case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; @@ -82,7 +82,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::VALUETYPE: return "ValueType"; case ISD::Register: return "Register"; case ISD::RegisterMask: return "RegisterMask"; - case ISD::Constant: return "Constant"; + case ISD::Constant: + if (cast<ConstantSDNode>(this)->isOpaque()) + return "OpaqueConstant"; + return "Constant"; case ISD::ConstantFP: return "ConstantFP"; case ISD::GlobalAddress: return "GlobalAddress"; case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; @@ -91,6 +94,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::READ_REGISTER: return "READ_REGISTER"; + case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; @@ -112,7 +117,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { } case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; - case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstant: + if (cast<ConstantSDNode>(this)->isOpaque()) + return "OpaqueTargetConstant"; + return "TargetConstant"; case ISD::TargetConstantFP: return "TargetConstantFP"; case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; @@ -213,6 +221,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ZERO_EXTEND: return "zero_extend"; case ISD::ANY_EXTEND: return "any_extend"; case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg"; + case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg"; + case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; @@ -225,8 +236,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_TO_UINT: return "fp_to_uint"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; - case ISD::FP16_TO_FP32: return "fp16_to_fp32"; - case ISD::FP32_TO_FP16: return "fp32_to_fp16"; + case ISD::FP16_TO_FP: return "fp16_to_fp"; + case ISD::FP_TO_FP16: return "fp_to_fp16"; case ISD::CONVERT_RNDSAT: { switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { @@ -325,7 +336,7 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { } } -void SDNode::dump() const { dump(0); } +void SDNode::dump() const { dump(nullptr); } void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); dbgs() << '\n'; @@ -352,7 +363,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), e = MN->memoperands_end(); i != e; ++i) { OS << **i; - if (llvm::next(i) != e) + if (std::next(i) != e) OS << " "; } OS << ">"; @@ -385,7 +396,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { dyn_cast<GlobalAddressSDNode>(this)) { int64_t offset = GADN->getOffset(); OS << '<'; - WriteAsOperand(OS, GADN->getGlobal()); + GADN->getGlobal()->printAsOperand(OS); OS << '>'; if (offset > 0) OS << " + " << offset; @@ -422,7 +433,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << LBB->getName() << " "; OS << (const void*)BBDN->getBasicBlock() << ">"; } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { - OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); + OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :nullptr); } else if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(this)) { OS << "'" << ES->getSymbol() << "'"; @@ -476,9 +487,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { dyn_cast<BlockAddressSDNode>(this)) { int64_t offset = BA->getOffset(); OS << "<"; - WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); + BA->getBlockAddress()->getFunction()->printAsOperand(OS, false); OS << ", "; - WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); + BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false); OS << ">"; if (offset > 0) OS << " + " << offset; @@ -590,7 +601,7 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, void SDNode::dumpr() const { VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, 0, once); + DumpNodesr(dbgs(), this, 0, nullptr, once); } void SDNode::dumpr(const SelectionDAG *G) const { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index b8b4db4..57e22e2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "isel" #include "llvm/CodeGen/SelectionDAGISel.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" @@ -20,7 +19,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -33,8 +31,8 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" @@ -59,6 +57,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "isel" + STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); @@ -141,6 +141,25 @@ STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector"); STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue"); STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue"); STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad"); + +// Intrinsic instructions... +STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call"); +STATISTIC(NumFastIselFailSAddWithOverflow, + "Fast isel fails on sadd.with.overflow"); +STATISTIC(NumFastIselFailUAddWithOverflow, + "Fast isel fails on uadd.with.overflow"); +STATISTIC(NumFastIselFailSSubWithOverflow, + "Fast isel fails on ssub.with.overflow"); +STATISTIC(NumFastIselFailUSubWithOverflow, + "Fast isel fails on usub.with.overflow"); +STATISTIC(NumFastIselFailSMulWithOverflow, + "Fast isel fails on smul.with.overflow"); +STATISTIC(NumFastIselFailUMulWithOverflow, + "Fast isel fails on umul.with.overflow"); +STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress"); +STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call"); +STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call"); +STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call"); #endif static cl::opt<bool> @@ -213,7 +232,7 @@ MachinePassRegistry RegisterScheduler::Registry; static cl::opt<RegisterScheduler::FunctionPassCtor, false, RegisterPassParser<RegisterScheduler> > ISHeuristic("pre-RA-sched", - cl::init(&createDefaultScheduler), + cl::init(&createDefaultScheduler), cl::Hidden, cl::desc("Instruction schedulers available (before register" " allocation):")); @@ -300,7 +319,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, "'usesCustomInserter', it must implement " "TargetLowering::EmitInstrWithCustomInserter!"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, @@ -357,7 +376,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { // Loop for blocks with phi nodes. for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { PHINode *PN = dyn_cast<PHINode>(BB->begin()); - if (PN == 0) continue; + if (!PN) continue; ReprocessBlock: // For each block with a PHI node, check to see if any of the input values @@ -367,7 +386,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I) for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i)); - if (CE == 0 || !CE->canTrap()) continue; + if (!CE || !CE->canTrap()) continue; // The only case we have to worry about is when the edge is critical. // Since this block has a PHI Node, we assume it has multiple input @@ -400,8 +419,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { RegInfo = &MF->getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); LibInfo = &getAnalysis<TargetLibraryInfo>(); - TTI = getAnalysisIfAvailable<TargetTransformInfo>(); - GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; + GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; TargetSubtargetInfo &ST = const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>()); @@ -418,13 +436,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); - CurDAG->init(*MF, TTI, TLI); + CurDAG->init(*MF, TLI); FuncInfo->set(Fn, *MF, CurDAG); if (UseMBPI && OptLevel != CodeGenOpt::None) FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); else - FuncInfo->BPI = 0; + FuncInfo->BPI = nullptr; SDB->init(GFI, *AA, LibInfo); @@ -449,7 +467,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; bool hasFI = MI->getOperand(0).isFI(); - unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); + unsigned Reg = + hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { @@ -457,7 +476,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (Def) { MachineBasicBlock::iterator InsertPos = Def; // FIXME: VR def may not be in entry block. - Def->getParent()->insert(llvm::next(InsertPos), MI); + Def->getParent()->insert(std::next(InsertPos), MI); } else DEBUG(dbgs() << "Dropping debug info for dead vreg" << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); @@ -483,16 +502,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only // user of LDI->second. - MachineInstr *CopyUseMI = NULL; - for (MachineRegisterInfo::use_iterator - UI = RegInfo->use_begin(LDI->second); - MachineInstr *UseMI = UI.skipInstruction();) { + MachineInstr *CopyUseMI = nullptr; + for (MachineRegisterInfo::use_instr_iterator + UI = RegInfo->use_instr_begin(LDI->second), + E = RegInfo->use_instr_end(); UI != E; ) { + MachineInstr *UseMI = &*(UI++); if (UseMI->isDebugValue()) continue; if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) { CopyUseMI = UseMI; continue; } // Otherwise this is another use or second copy use. - CopyUseMI = NULL; break; + CopyUseMI = nullptr; break; } if (CopyUseMI) { MachineInstr *NewMI = @@ -509,21 +529,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if there are any calls in this machine function. MachineFrameInfo *MFI = MF->getFrameInfo(); - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; - ++I) { - + for (const auto &MBB : *MF) { if (MFI->hasCalls() && MF->hasInlineAsm()) break; - const MachineBasicBlock *MBB = I; - for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); - II != IE; ++II) { - const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); + for (const auto &MI : MBB) { + const MCInstrDesc &MCID = TM.getInstrInfo()->get(MI.getOpcode()); if ((MCID.isCall() && !MCID.isReturn()) || - II->isStackAligningInlineAsm()) { + MI.isStackAligningInlineAsm()) { MFI->setHasCalls(true); } - if (II->isInlineAsm()) { + if (MI.isInlineAsm()) { MF->setHasInlineAsm(true); } } @@ -564,6 +580,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // at this point. FuncInfo->clear(); + DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); + DEBUG(MF->print(dbgs())); + return true; } @@ -621,7 +640,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); - CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne); + CurDAG->computeKnownBits(Src, KnownZero, KnownOne); FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); } while (!Worklist.empty()); } @@ -801,7 +820,7 @@ public: /// NodeDeleted - Handle nodes deleted from the graph. If the node being /// deleted is the current ISelPosition node, update ISelPosition. /// - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { if (ISelPosition == SelectionDAG::allnodes_iterator(N)) ++ISelPosition; } @@ -974,7 +993,37 @@ static void collectFailStats(const Instruction *I) { case Instruction::FCmp: NumFastIselFailFCmp++; return; case Instruction::PHI: NumFastIselFailPHI++; return; case Instruction::Select: NumFastIselFailSelect++; return; - case Instruction::Call: NumFastIselFailCall++; return; + case Instruction::Call: { + if (auto const *Intrinsic = dyn_cast<IntrinsicInst>(I)) { + switch (Intrinsic->getIntrinsicID()) { + default: + NumFastIselFailIntrinsicCall++; return; + case Intrinsic::sadd_with_overflow: + NumFastIselFailSAddWithOverflow++; return; + case Intrinsic::uadd_with_overflow: + NumFastIselFailUAddWithOverflow++; return; + case Intrinsic::ssub_with_overflow: + NumFastIselFailSSubWithOverflow++; return; + case Intrinsic::usub_with_overflow: + NumFastIselFailUSubWithOverflow++; return; + case Intrinsic::smul_with_overflow: + NumFastIselFailSMulWithOverflow++; return; + case Intrinsic::umul_with_overflow: + NumFastIselFailUMulWithOverflow++; return; + case Intrinsic::frameaddress: + NumFastIselFailFrameaddress++; return; + case Intrinsic::sqrt: + NumFastIselFailSqrt++; return; + case Intrinsic::experimental_stackmap: + NumFastIselFailStackMap++; return; + case Intrinsic::experimental_patchpoint_void: // fall-through + case Intrinsic::experimental_patchpoint_i64: + NumFastIselFailPatchPoint++; return; + } + } + NumFastIselFailCall++; + return; + } case Instruction::Shl: NumFastIselFailShl++; return; case Instruction::LShr: NumFastIselFailLShr++; return; case Instruction::AShr: NumFastIselFailAShr++; return; @@ -991,7 +1040,7 @@ static void collectFailStats(const Instruction *I) { void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. - FastISel *FastIS = 0; + FastISel *FastIS = nullptr; if (TM.Options.EnableFastISel) FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo); @@ -1064,15 +1113,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // where they are, so we can be sure to emit subsequent instructions // after them. if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) - FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt)); + FastIS->setLastLocalValue(std::prev(FuncInfo->InsertPt)); else - FastIS->setLastLocalValue(0); + FastIS->setLastLocalValue(nullptr); } unsigned NumFastIselRemaining = std::distance(Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { - const Instruction *Inst = llvm::prior(BI); + const Instruction *Inst = std::prev(BI); // If we no longer require this instruction, skip it. if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { @@ -1093,7 +1142,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to fold the load if so. const Instruction *BeforeInst = Inst; while (BeforeInst != Begin) { - BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst)); + BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst)); if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) break; } @@ -1101,7 +1150,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { BeforeInst->hasOneUse() && FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) { // If we succeeded, don't re-select the load. - BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); + BI = std::next(BasicBlock::const_iterator(BeforeInst)); --NumFastIselRemaining; ++NumFastIselSuccess; } @@ -1604,7 +1653,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, APInt NeededMask = DesiredMask & ~ActualMask; APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne); + CurDAG->computeKnownBits(LHS, KnownZero, KnownOne); // If all the missing bits in the or are already known to be set, match! if ((NeededMask & KnownOne) == NeededMask) @@ -1673,7 +1722,7 @@ static SDNode *findGlueUse(SDNode *N) { if (Use.getResNo() == FlagResNo) return Use.getUser(); } - return NULL; + return nullptr; } /// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". @@ -1780,7 +1829,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, EVT VT = Root->getValueType(Root->getNumValues()-1); while (VT == MVT::Glue) { SDNode *GU = findGlueUse(Root); - if (GU == NULL) + if (!GU) break; Root = GU; VT = Root->getValueType(Root->getNumValues()-1); @@ -1802,12 +1851,39 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { SelectInlineAsmMemoryOperands(Ops); EVT VTs[] = { MVT::Other, MVT::Glue }; - SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), - VTs, &Ops[0], Ops.size()); + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, Ops); + New->setNodeId(-1); + return New.getNode(); +} + +SDNode +*SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(0)); + const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + unsigned Reg = getTargetLowering()->getRegisterByName( + RegStr->getString().data(), Op->getValueType(0)); + SDValue New = CurDAG->getCopyFromReg( + CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); return New.getNode(); } +SDNode +*SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); + const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + unsigned Reg = getTargetLowering()->getRegisterByName( + RegStr->getString().data(), Op->getOperand(2).getValueType()); + SDValue New = CurDAG->getCopyToReg( + CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2)); + New->setNodeId(-1); + return New.getNode(); +} + + + SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0)); } @@ -1843,7 +1919,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, // Now that all the normal results are replaced, we replace the chain and // glue results if present. if (!ChainNodesMatched.empty()) { - assert(InputChain.getNode() != 0 && + assert(InputChain.getNode() && "Matched input chains but didn't produce a chain"); // Loop over all of the nodes we matched that produced a chain result. // Replace all the chain results with the final chain we ended up with. @@ -1874,7 +1950,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, // If the result produces glue, update any glue results in the matched // pattern with the glue result. - if (InputGlue.getNode() != 0) { + if (InputGlue.getNode()) { // Handle any interior nodes explicitly marked. for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) { SDNode *FRN = GlueResultNodesMatched[i]; @@ -2077,13 +2153,13 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, if (InputChains.size() == 1) return InputChains[0]; return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), - MVT::Other, &InputChains[0], InputChains.size()); + MVT::Other, InputChains); } /// MorphNode - Handle morphing a node in place for the selector. SDNode *SelectionDAGISel:: MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, - const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) { + ArrayRef<SDValue> Ops, unsigned EmitNodeInfo) { // It is possible we're using MorphNodeTo to replace a node with no // normal results with one that has a normal result (or we could be // adding a chain) and the input could have glue and chains as well. @@ -2103,7 +2179,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, // Call the underlying SelectionDAG routine to do the transmogrification. Note // that this deletes operands of the old node that become dead. - SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps); + SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops); // MorphNodeTo can operate in two ways: if an existing node with the // specified operands exists, it can just return it. Otherwise, it @@ -2195,8 +2271,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering *TLI, - unsigned ChildNo) { + SDValue N, const TargetLowering *TLI, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) return false; // Match fails if out of range child #. return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI); @@ -2228,7 +2303,15 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, Val = GetVBR(Val, MatcherTable, MatcherIndex); ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); - return C != 0 && C->getSExtValue() == Val; + return C && C->getSExtValue() == Val; +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo)); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -2241,7 +2324,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, if (N->getOpcode() != ISD::AND) return false; ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); - return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val); + return C && SDISel.CheckAndMask(N.getOperand(0), C, Val); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -2254,7 +2337,7 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, if (N->getOpcode() != ISD::OR) return false; ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); - return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val); + return C && SDISel.CheckOrMask(N.getOperand(0), C, Val); } /// IsPredicateKnownToFail - If we know how and can do so without pushing a @@ -2314,6 +2397,14 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); return Index; + case SelectionDAGISel::OPC_CheckChild0Integer: + case SelectionDAGISel::OPC_CheckChild1Integer: + case SelectionDAGISel::OPC_CheckChild2Integer: + case SelectionDAGISel::OPC_CheckChild3Integer: + case SelectionDAGISel::OPC_CheckChild4Integer: + Result = !::CheckChildInteger(Table, Index, N, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer); + return Index; case SelectionDAGISel::OPC_CheckAndImm: Result = !::CheckAndImm(Table, Index, N, SDISel); return Index; @@ -2378,13 +2469,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::LIFETIME_START: case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. - return 0; + return nullptr; case ISD::AssertSext: case ISD::AssertZext: CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); - return 0; + return nullptr; case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch); + case ISD::READ_REGISTER: return Select_READ_REGISTER(NodeToMatch); + case ISD::WRITE_REGISTER: return Select_WRITE_REGISTER(NodeToMatch); case ISD::UNDEF: return Select_UNDEF(NodeToMatch); } @@ -2530,7 +2623,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } case OPC_RecordNode: { // Remember this node, it may end up being an operand in the pattern. - SDNode *Parent = 0; + SDNode *Parent = nullptr; if (NodeStack.size() > 1) Parent = NodeStack[NodeStack.size()-2].getNode(); RecordedNodes.push_back(std::make_pair(N, Parent)); @@ -2694,6 +2787,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckInteger: if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; continue; + case OPC_CheckChild0Integer: case OPC_CheckChild1Integer: + case OPC_CheckChild2Integer: case OPC_CheckChild3Integer: + case OPC_CheckChild4Integer: + if (!::CheckChildInteger(MatcherTable, MatcherIndex, N, + Opcode-OPC_CheckChild0Integer)) break; + continue; case OPC_CheckAndImm: if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break; continue; @@ -2731,7 +2830,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); RecordedNodes.push_back(std::pair<SDValue, SDNode*>( - CurDAG->getTargetConstant(Val, VT), (SDNode*)0)); + CurDAG->getTargetConstant(Val, VT), nullptr)); continue; } case OPC_EmitRegister: { @@ -2739,7 +2838,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; unsigned RegNo = MatcherTable[MatcherIndex++]; RecordedNodes.push_back(std::pair<SDValue, SDNode*>( - CurDAG->getRegister(RegNo, VT), (SDNode*)0)); + CurDAG->getRegister(RegNo, VT), nullptr)); continue; } case OPC_EmitRegister2: { @@ -2751,7 +2850,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned RegNo = MatcherTable[MatcherIndex++]; RegNo |= MatcherTable[MatcherIndex++] << 8; RecordedNodes.push_back(std::pair<SDValue, SDNode*>( - CurDAG->getRegister(RegNo, VT), (SDNode*)0)); + CurDAG->getRegister(RegNo, VT), nullptr)); continue; } @@ -2776,7 +2875,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1 // These are space-optimized forms of OPC_EmitMergeInputChains. - assert(InputChain.getNode() == 0 && + assert(!InputChain.getNode() && "EmitMergeInputChains should be the first chain producing node"); assert(ChainNodesMatched.empty() && "Should only have one EmitMergeInputChains per match"); @@ -2797,13 +2896,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Merge the input chains if they are not intra-pattern references. InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); - if (InputChain.getNode() == 0) + if (!InputChain.getNode()) break; // Failed to merge. continue; } case OPC_EmitMergeInputChains: { - assert(InputChain.getNode() == 0 && + assert(!InputChain.getNode() && "EmitMergeInputChains should be the first chain producing node"); // This node gets a list of nodes we matched in the input that have // chains. We want to token factor all of the input chains to these nodes @@ -2839,7 +2938,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Merge the input chains if they are not intra-pattern references. InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); - if (InputChain.getNode() == 0) + if (!InputChain.getNode()) break; // Failed to merge. continue; @@ -2850,7 +2949,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; - if (InputChain.getNode() == 0) + if (!InputChain.getNode()) InputChain = CurDAG->getEntryNode(); InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch), @@ -2866,7 +2965,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm"); SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); - RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0)); + RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr)); continue; } @@ -2898,7 +2997,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, else if (VTs.size() == 2) VTList = CurDAG->getVTList(VTs[0], VTs[1]); else - VTList = CurDAG->getVTList(VTs.data(), VTs.size()); + VTList = CurDAG->getVTList(VTs); // Get the operand list. unsigned NumOps = MatcherTable[MatcherIndex++]; @@ -2932,11 +3031,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // If this has chain/glue inputs, add them. if (EmitNodeInfo & OPFL_Chain) Ops.push_back(InputChain); - if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0) + if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr) Ops.push_back(InputGlue); // Create the node. - SDNode *Res = 0; + SDNode *Res = nullptr; if (Opcode != OPC_MorphNodeTo) { // If this is a normal EmitNode command, just create the new node and // add the results to the RecordedNodes list. @@ -2947,17 +3046,16 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, for (unsigned i = 0, e = VTs.size(); i != e; ++i) { if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break; RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i), - (SDNode*) 0)); + nullptr)); } } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) { - Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(), - EmitNodeInfo); + Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo); } else { // NodeToMatch was eliminated by CSE when the target changed the DAG. // We will visit the equivalent node later. DEBUG(dbgs() << "Node was eliminated by CSE\n"); - return 0; + return nullptr; } // If the node had chain/glue results, update our notion of the current @@ -3087,7 +3185,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // FIXME: We just return here, which interacts correctly with SelectRoot // above. We should fix this to not return an SDNode* anymore. - return 0; + return nullptr; } } @@ -3099,7 +3197,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, while (1) { if (MatchScopes.empty()) { CannotYetSelect(NodeToMatch); - return 0; + return nullptr; } // Restore the interpreter state back to the point where the scope was diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index b752b482..4df5ede 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -15,12 +15,11 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "dag-printer" + namespace llvm { template<> struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits { @@ -125,9 +126,9 @@ namespace llvm { static void addCustomGraphFeatures(SelectionDAG *G, GraphWriter<SelectionDAG*> &GW) { - GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot"); if (G->getRoot().getNode()) - GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(), + GW.emitEdge(nullptr, -1, G->getRoot().getNode(), G->getRoot().getResNo(), "color=blue,style=dashed"); } }; @@ -290,10 +291,10 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const { if (DAG) { // Draw a special "GraphRoot" node to indicate the root of the graph. - GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot"); const SDNode *N = DAG->getRoot().getNode(); if (N && N->getNodeId() != -1) - GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1, + GW.emitEdge(nullptr, -1, &SUnits[N->getNodeId()], -1, "color=blue,style=dashed"); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 82b068d..05ace41 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/CommandLine.h" @@ -39,7 +40,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, : TargetLoweringBase(tm, tlof) {} const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { - return NULL; + return nullptr; } /// Check whether a given call node is in tail position within its function. If @@ -74,6 +75,7 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); Alignment = CS->getParamAlignment(AttrIdx); } @@ -101,12 +103,11 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, getLibcallCallingConv(LC), - /*isTailCall=*/false, - doesNotReturn, isReturnValueUsed, Callee, Args, - DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) + .setSExtResult(isSigned).setZExtResult(!isSigned); return LowerCallTo(CLI); } @@ -224,7 +225,7 @@ unsigned TargetLowering::getJumpTableEncoding() const { return MachineJumpTableInfo::EK_BlockAddress; // In PIC mode, if the target supports a GPRel32 directive, use it. - if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0) + if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr) return MachineJumpTableInfo::EK_GPRel32BlockAddress; // Otherwise, use a label difference. @@ -326,6 +327,10 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, assert(Op.getNode()->getNumValues() == 1 && "ShrinkDemandedOp only supports nodes with one result!"); + // Early return, as this function cannot handle vector types. + if (Op.getValueType().isVector()) + return false; + // Don't do this if the node has another user, which may require the // full value. if (!Op.getNode()->hasOneUse()) @@ -384,7 +389,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Depth != 0) { // If not at the root, Just compute the KnownZero/KnownOne bits to // simplify things downstream. - TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -414,7 +419,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { APInt LHSZero, LHSOne; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth); + TLO.DAG.computeKnownBits(Op.getOperand(0), LHSZero, LHSOne, Depth); // If the LHS already has zeros where RHSC does, this and is dead. if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op.getOperand(0)); @@ -846,6 +851,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } break; } + case ISD::BUILD_PAIR: { + EVT HalfVT = Op.getOperand(0).getValueType(); + unsigned HalfBitWidth = HalfVT.getScalarSizeInBits(); + + APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth); + APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth); + + APInt KnownZeroLo, KnownOneLo; + APInt KnownZeroHi, KnownOneHi; + + if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo, + KnownOneLo, TLO, Depth + 1)) + return true; + + if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi, + KnownOneHi, TLO, Depth + 1)) + return true; + + KnownZero = KnownZeroLo.zext(BitWidth) | + KnownZeroHi.zext(BitWidth).shl(HalfBitWidth); + + KnownOne = KnownOneLo.zext(BitWidth) | + KnownOneHi.zext(BitWidth).shl(HalfBitWidth); + break; + } case ISD::ZERO_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); @@ -1038,8 +1068,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } // FALL THROUGH default: - // Just use ComputeMaskedBits to compute output bits. - TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + // Just use computeKnownBits to compute output bits. + TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); break; } @@ -1051,14 +1081,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return false; } -/// computeMaskedBitsForTargetNode - Determine which of the bits specified +/// computeKnownBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. -void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || @@ -1072,6 +1102,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, /// targets that want to expose additional information about sign bits to the /// DAG Combiner. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + const SelectionDAG &, unsigned Depth) const { assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || @@ -1083,7 +1114,7 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, } /// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly -/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to +/// one bit set. This differs from computeKnownBits in that it doesn't need to /// determine which bit is set. /// static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { @@ -1106,15 +1137,69 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // More could be done here, though the above checks are enough // to handle some common cases. - // Fall back to ComputeMaskedBits to catch other known cases. + // Fall back to computeKnownBits to catch other known cases. EVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(Val, KnownZero, KnownOne); + DAG.computeKnownBits(Val, KnownZero, KnownOne); return (KnownZero.countPopulation() == BitWidth - 1) && (KnownOne.countPopulation() == 1); } +bool TargetLowering::isConstTrueVal(const SDNode *N) const { + if (!N) + return false; + + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) { + const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + BitVector UndefElements; + CN = BV->getConstantSplatNode(&UndefElements); + // Only interested in constant splats, and we don't try to handle undef + // elements in identifying boolean constants. + if (!CN || UndefElements.none()) + return false; + } + + switch (getBooleanContents(N->getValueType(0))) { + case UndefinedBooleanContent: + return CN->getAPIntValue()[0]; + case ZeroOrOneBooleanContent: + return CN->isOne(); + case ZeroOrNegativeOneBooleanContent: + return CN->isAllOnesValue(); + } + + llvm_unreachable("Invalid boolean contents"); +} + +bool TargetLowering::isConstFalseVal(const SDNode *N) const { + if (!N) + return false; + + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) { + const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + BitVector UndefElements; + CN = BV->getConstantSplatNode(&UndefElements); + // Only interested in constant splats, and we don't try to handle undef + // elements in identifying boolean constants. + if (!CN || UndefElements.none()) + return false; + } + + if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent) + return !CN->getAPIntValue()[0]; + + return CN->isNullValue(); +} + /// SimplifySetCC - Try to simplify a setcc built with the specified operands /// and cc. If it is unable to simplify it, return a null SDValue. SDValue @@ -1130,7 +1215,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETFALSE2: return DAG.getConstant(0, VT); case ISD::SETTRUE: case ISD::SETTRUE2: { - TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent Cnt = + getBooleanContents(N0->getValueType(0)); return DAG.getConstant( Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); } @@ -1331,10 +1417,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT newVT = N0.getOperand(0).getValueType(); if (DCI.isBeforeLegalizeOps() || (isOperationLegal(ISD::SETCC, newVT) && - getCondCodeAction(Cond, newVT.getSimpleVT())==Legal)) - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(C1.trunc(InSize), newVT), - Cond); + getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) { + EVT NewSetCCVT = getSetCCResultType(*DAG.getContext(), newVT); + SDValue NewConst = DAG.getConstant(C1.trunc(InSize), newVT); + + SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), + NewConst, Cond); + return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType()); + } break; } default: @@ -1417,7 +1507,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } else if (N1C->getAPIntValue() == 1 && (VT == MVT::i1 || - getBooleanContents(false) == ZeroOrOneBooleanContent)) { + getBooleanContents(N0->getValueType(0)) == + ZeroOrOneBooleanContent)) { SDValue Op0 = N0; if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); @@ -1468,18 +1559,32 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Canonicalize GE/LE comparisons to use GT/LT comparisons. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true - // X >= C0 --> X > (C0-1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C1-1, N1.getValueType()), - (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + // X >= C0 --> X > (C0 - 1) + APInt C = C1 - 1; + ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, N1.getValueType()), + NewCC); + } } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true - // X <= C0 --> X < (C0+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C1+1, N1.getValueType()), - (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + // X <= C0 --> X < (C0 + 1) + APInt C = C1 + 1; + ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, N1.getValueType()), + NewCC); + } } if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) @@ -1535,7 +1640,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND) if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. @@ -1565,7 +1670,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); - EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), @@ -1593,7 +1698,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } NewC = NewC.lshr(ShiftBits); if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) { - EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, @@ -1674,7 +1779,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. uint64_t EqVal = 0; - switch (getBooleanContents(N0.getValueType().isVector())) { + switch (getBooleanContents(N0.getValueType())) { case UndefinedBooleanContent: case ZeroOrOneBooleanContent: EqVal = ISD::isTrueWhenEqual(Cond); @@ -1988,7 +2093,7 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ return "r"; if (ConstraintVT.isFloatingPoint()) return "f"; // works for many targets - return 0; + return nullptr; } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops @@ -2022,12 +2127,12 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (Op.getOpcode() == ISD::ADD) { C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); - if (C == 0 || GA == 0) { + if (!C || !GA) { C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); } - if (C == 0 || GA == 0) - C = 0, GA = 0; + if (!C || !GA) + C = nullptr, GA = nullptr; } // If we find a valid operand, map to the TargetXXX version so that the @@ -2062,14 +2167,14 @@ std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { if (Constraint.empty() || Constraint[0] != '{') - return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); + return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); // Remove the braces from around the name. StringRef RegName(Constraint.data()+1, Constraint.size()-2); std::pair<unsigned, const TargetRegisterClass*> R = - std::make_pair(0u, static_cast<const TargetRegisterClass*>(0)); + std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr)); // Figure out which register class contains this reg. const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo(); @@ -2364,7 +2469,7 @@ TargetLowering::ConstraintWeight Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; // Look at the constraint type. switch (*constraint) { @@ -2520,7 +2625,8 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, if (ShAmt) { // TODO: For UDIV use SRL instead of SRA. SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType())); - Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false, + true); d = d.ashr(ShAmt); } @@ -2537,9 +2643,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> -SDValue TargetLowering:: -BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector<SDNode*> *Created) const { +SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, bool IsAfterLegalization, + std::vector<SDNode *> *Created) const { EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2548,8 +2654,7 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, if (!isTypeLegal(VT)) return SDValue(); - APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); - APInt::ms magics = d.magic(); + APInt::ms magics = Divisor.magic(); // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type @@ -2566,13 +2671,13 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, else return SDValue(); // No mulhs or equvialent // If d > 0 and m < 0, add the numerator - if (d.isStrictlyPositive() && magics.m.isNegative()) { + if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); if (Created) Created->push_back(Q.getNode()); } // If d < 0 and m > 0, subtract the numerator. - if (d.isNegative() && magics.m.isStrictlyPositive()) { + if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); if (Created) Created->push_back(Q.getNode()); @@ -2585,9 +2690,9 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, Created->push_back(Q.getNode()); } // Extract the sign bit and add it to the quotient - SDValue T = - DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1, - getShiftAmountTy(Q.getValueType()))); + SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(VT.getScalarSizeInBits() - 1, + getShiftAmountTy(Q.getValueType()))); if (Created) Created->push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); @@ -2597,9 +2702,9 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> -SDValue TargetLowering:: -BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector<SDNode*> *Created) const { +SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, bool IsAfterLegalization, + std::vector<SDNode *> *Created) const { EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2610,22 +2715,21 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, // FIXME: We should use a narrower constant when the upper // bits are known to be zero. - const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); - APInt::mu magics = N1C.magicu(); + APInt::mu magics = Divisor.magicu(); SDValue Q = N->getOperand(0); // If the divisor is even, we can avoid using the expensive fixup by shifting // the divided value upfront. - if (magics.a != 0 && !N1C[0]) { - unsigned Shift = N1C.countTrailingZeros(); + if (magics.a != 0 && !Divisor[0]) { + unsigned Shift = Divisor.countTrailingZeros(); Q = DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType()))); if (Created) Created->push_back(Q.getNode()); // Get magic number for the shifted divisor. - magics = N1C.lshr(Shift).magicu(Shift); + magics = Divisor.lshr(Shift).magicu(Shift); assert(magics.a == 0 && "Should use cheap fixup now"); } @@ -2644,7 +2748,7 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, Created->push_back(Q.getNode()); if (magics.a == 0) { - assert(magics.s < N1C.getBitWidth() && + assert(magics.s < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); return DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); @@ -2663,3 +2767,183 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType()))); } } + +bool TargetLowering:: +verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { + if (!isa<ConstantSDNode>(Op.getOperand(0))) { + DAG.getContext()->emitError("argument to '__builtin_return_address' must " + "be a constant integer"); + return true; + } + + return false; +} + +//===----------------------------------------------------------------------===// +// Legalization Utilities +//===----------------------------------------------------------------------===// + +bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, + SelectionDAG &DAG, SDValue LL, SDValue LH, + SDValue RL, SDValue RH) const { + EVT VT = N->getValueType(0); + SDLoc dl(N); + + bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT); + bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT); + bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT); + bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT); + if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { + unsigned OuterBitSize = VT.getSizeInBits(); + unsigned InnerBitSize = HiLoVT.getSizeInBits(); + unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); + unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); + + // LL, LH, RL, and RH must be either all NULL or all set to a value. + assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) || + (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode())); + + if (!LL.getNode() && !RL.getNode() && + isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0)); + RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1)); + } + + if (!LL.getNode()) + return false; + + APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); + if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && + DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { + // The inputs are both zero-extended. + if (HasUMUL_LOHI) { + // We can emit a umul_lohi. + Lo = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return true; + } + if (HasMULHU) { + // We can emit a mulhu+mul. + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); + return true; + } + } + if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { + // The input values are both sign-extended. + if (HasSMUL_LOHI) { + // We can emit a smul_lohi. + Lo = DAG.getNode(ISD::SMUL_LOHI, dl, + DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return true; + } + if (HasMULHS) { + // We can emit a mulhs+mul. + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL); + return true; + } + } + + if (!LH.getNode() && !RH.getNode() && + isOperationLegalOrCustom(ISD::SRL, VT) && + isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits(); + SDValue Shift = DAG.getConstant(ShiftAmt, getShiftAmountTy(VT)); + LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift); + LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH); + RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift); + RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH); + } + + if (!LH.getNode()) + return false; + + if (HasUMUL_LOHI) { + // Lo,Hi = umul LHS, RHS. + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Lo = UMulLOHI; + Hi = UMulLOHI.getValue(1); + RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + return true; + } + if (HasMULHU) { + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); + RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + return true; + } + } + return false; +} + +bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getOperand(0).getValueType(); + EVT NVT = Node->getValueType(0); + SDLoc dl(SDValue(Node, 0)); + + // FIXME: Only f32 to i64 conversions are supported. + if (VT != MVT::f32 || NVT != MVT::i64) + return false; + + // Expand f32 -> i64 conversion + // This algorithm comes from compiler-rt's implementation of fixsfdi: + // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits()); + SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT); + SDValue ExponentLoBit = DAG.getConstant(23, IntVT); + SDValue Bias = DAG.getConstant(127, IntVT); + SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), + IntVT); + SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT); + SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT); + + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); + + SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), + DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT))); + SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); + + SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), + DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT))); + Sign = DAG.getSExtOrTrunc(Sign, dl, NVT); + + SDValue R = DAG.getNode(ISD::OR, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), + DAG.getConstant(0x00800000, IntVT)); + + R = DAG.getZExtOrTrunc(R, dl, NVT); + + + R = DAG.getSelectCC(dl, Exponent, ExponentLoBit, + DAG.getNode(ISD::SHL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), + dl, getShiftAmountTy(IntVT))), + DAG.getNode(ISD::SRL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), + dl, getShiftAmountTy(IntVT))), + ISD::SETGT); + + SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT, + DAG.getNode(ISD::XOR, dl, NVT, R, Sign), + Sign); + + Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT), + DAG.getConstant(0, NVT), Ret, ISD::SETLT); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp index f769b44..0e89bad 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -15,8 +15,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; -TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) - : TD(TM.getDataLayout()) { +TargetSelectionDAGInfo::TargetSelectionDAGInfo(const DataLayout *DL) + : DL(DL) { } TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { |