diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r-- | contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2537 |
1 files changed, 1638 insertions, 899 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2e09ec0..43f72c5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -35,6 +35,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -43,6 +45,7 @@ STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); +STATISTIC(SlicedLoads, "Number of load sliced"); namespace { static cl::opt<bool> @@ -53,6 +56,14 @@ namespace { CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Include global information in alias analysis")); + /// Hidden option to stress test load slicing, i.e., when this option + /// is enabled, load slicing bypasses most of its profitability guards. + static cl::opt<bool> + StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, + cl::desc("Bypass the profitability model of load " + "slicing"), + cl::init(false)); + //------------------------------ DAGCombiner ---------------------------------// class DAGCombiner { @@ -62,6 +73,7 @@ namespace { CodeGenOpt::Level OptLevel; bool LegalOperations; bool LegalTypes; + bool ForCodeSize; // Worklist of all of the nodes that need to be simplified. // @@ -144,6 +156,7 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + bool SliceUpLoad(SDNode *N); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); @@ -154,8 +167,8 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); - void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, - SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, + SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType); /// combine - call the node-specific routine that knows how to fold each @@ -246,18 +259,18 @@ namespace { SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); - SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); + SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); SDValue visitShiftByConstant(SDNode *N, unsigned Amt); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); - SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); - SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, + SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - DebugLoc DL, bool foldBooleans = true); + SDLoc DL, bool foldBooleans = true); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); @@ -267,7 +280,7 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); - SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue TransformFPLoadStorePair(SDNode *N); @@ -279,15 +292,15 @@ namespace { /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDValue OriginalChain, - SmallVector<SDValue, 8> &Aliases); + SmallVectorImpl<SDValue> &Aliases); /// isAlias - Return true if there is any possibility that the two addresses /// overlap. - bool isAlias(SDValue Ptr1, int64_t Size1, + bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, + SDValue Ptr2, int64_t Size2, bool IsVolatile2, const Value *SrcValue2, int SrcValueOffset2, unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const; @@ -299,7 +312,7 @@ namespace { /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, + SDValue &Ptr, int64_t &Size, bool &IsVolatile, const Value *&SrcValue, int &SrcValueOffset, unsigned &SrcValueAlignment, const MDNode *&TBAAInfo) const; @@ -315,8 +328,15 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { + AttributeSet FnAttrs = + DAG.getMachineFunction().getFunction()->getAttributes(); + ForCodeSize = + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + } /// Run - runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); @@ -326,7 +346,11 @@ namespace { /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. EVT getShiftAmountTy(EVT LHSTy) { - return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); + assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); + if (LHSTy.isVector()) + return LHSTy; + return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) + : TLI.getPointerTy(); } /// isTypeLegal - This method returns true if we are running before type @@ -335,6 +359,12 @@ namespace { if (!LegalTypes) return true; return TLI.isTypeLegal(VT); } + + /// getSetCCResultType - Convenience wrapper around + /// TargetLowering::getSetCCResultType + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(*DAG.getContext(), VT); + } }; } @@ -482,12 +512,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options, Depth+1)) - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1), Op.getOperand(0)); @@ -501,7 +531,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, return Op.getOperand(1); // fold (fneg (fsub A, B)) -> (fsub B, A) - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(0)); case ISD::FMUL: @@ -512,24 +542,24 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options, Depth+1)) - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1)); case ISD::FP_EXTEND: case ISD::FSIN: - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1)); case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); @@ -573,7 +603,7 @@ static bool isOneUseSetCC(SDValue N) { return false; } -SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, +SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { @@ -587,7 +617,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, } if (N0.hasOneUse()) { // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorkList(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); @@ -605,7 +635,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, } if (N1.hasOneUse()) { // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); AddToWorkList(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); @@ -706,7 +736,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { } void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { - DebugLoc dl = Load->getDebugLoc(); + SDLoc dl(Load); EVT VT = Load->getValueType(0); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); @@ -725,7 +755,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = false; - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) @@ -735,9 +765,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = true; return DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + MemVT, LD->getMemOperand()); } unsigned Opc = Op.getOpcode(); @@ -767,7 +795,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) return SDValue(); EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (NewOp.getNode() == 0) @@ -782,7 +810,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (NewOp.getNode() == 0) @@ -845,7 +873,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(Opc, dl, PVT, NN0, NN1)); } @@ -892,7 +920,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); } @@ -923,7 +951,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) { // fold (aext (sext x)) -> (sext x) DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); } return SDValue(); } @@ -948,7 +976,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDNode *N = Op.getNode(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT MemVT = LD->getMemoryVT(); @@ -958,9 +986,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + MemVT, LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); DEBUG(dbgs() << "\nPromoting "; @@ -1008,7 +1034,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // try and combine it. while (!WorkListContents.empty()) { SDNode *N; - // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. + // The WorkListOrder holds the SDNodes in order, but it may contain + // duplicates. // In order to avoid a linear scan, we use a set (O(log N)) to hold what the // worklist *should* contain, and check the node we want to visit is should // actually be visited. @@ -1245,7 +1272,7 @@ static SDValue getInputChainForNode(SDNode *N) { if (unsigned NumOps = N->getNumOperands()) { if (N->getOperand(0).getValueType() == MVT::Other) return N->getOperand(0); - else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + if (N->getOperand(NumOps-1).getValueType() == MVT::Other) return N->getOperand(NumOps-1); for (unsigned i = 1; i < NumOps-1; ++i) if (N->getOperand(i).getValueType() == MVT::Other) @@ -1320,7 +1347,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getEntryNode(); } else { // New and improved token factor. - Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, &Ops[0], Ops.size()); } @@ -1350,7 +1377,7 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { } static -SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, +SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1, SelectionDAG &DAG) { EVT VT = N0.getValueType(); SDValue N00 = N0.getOperand(0); @@ -1360,10 +1387,10 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && isa<ConstantSDNode>(N00.getOperand(1))) { // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) - N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, - DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT, + N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT, + DAG.getNode(ISD::SHL, SDLoc(N00), VT, N00.getOperand(0), N01), - DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT, + DAG.getNode(ISD::SHL, SDLoc(N01), VT, N00.getOperand(1), N01)); return DAG.getNode(ISD::ADD, DL, VT, N0, N1); } @@ -1400,7 +1427,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); // fold (add x, 0) -> x if (N1C && N1C->isNullValue()) return N0; @@ -1408,28 +1435,28 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, GA->getOffset() + (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A if (N1C && N0.getOpcode() == ISD::SUB) if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(N1C->getAPIntValue()+ N0C->getAPIntValue(), VT), N0.getOperand(1)); // reassociate add - SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); + SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); if (RADD.getNode() != 0) return RADD; // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); // fold (A + (0-B)) -> A-B if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); // fold (A+(B-A)) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) return N1.getOperand(0); @@ -1439,18 +1466,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // fold (A+(B-(A+C))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(0)) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1).getOperand(1)); // fold (A+(B-(C+A))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(1)) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1).getOperand(0)); // fold (A+((B-A)+or-C)) to (B+or-C) if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && N1.getOperand(0).getOpcode() == ISD::SUB && N0 == N1.getOperand(0).getOperand(1)) - return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, N1.getOperand(0).getOperand(0), N1.getOperand(1)); // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant @@ -1461,9 +1488,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N11 = N1.getOperand(1); if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, - DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), - DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, + DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), + DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) @@ -1481,17 +1508,17 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); } } // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); + SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG); if (Result.getNode()) return Result; } if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); + SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG); if (Result.getNode()) return Result; } @@ -1501,8 +1528,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, N1.getOperand(0).getOperand(1), N1.getOperand(1))); if (N0.getOpcode() == ISD::SHL && @@ -1510,8 +1537,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, + DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0).getOperand(1), N0.getOperand(1))); @@ -1524,7 +1551,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); } } @@ -1533,7 +1560,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.getOperand(0).getValueType() == MVT::i1 && !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } @@ -1550,18 +1577,18 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // If the flag result is dead, turn this into an ADD. if (!N->hasAnyUseOfValue(1)) - return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1), + return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Glue)); + SDLoc(N), MVT::Glue)); // canonicalize constant to RHS. if (N0C && !N1C) - return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); // fold (addc x, 0) -> x + no carry out if (N1C && N1C->isNullValue()) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Glue)); + SDLoc(N), MVT::Glue)); // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; @@ -1574,9 +1601,9 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), + return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Glue)); + SDLoc(N), MVT::Glue)); } return SDValue(); @@ -1591,30 +1618,25 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), + return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), N1, N0, CarryIn); // fold (adde x, y, false) -> (addc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1); + return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); return SDValue(); } // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. -static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, - SelectionDAG &DAG, bool LegalOperations) { - if (!VT.isVector()) { +static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, + SelectionDAG &DAG, + bool LegalOperations, bool LegalTypes) { + if (!VT.isVector()) + return DAG.getConstant(0, VT); + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) return DAG.getConstant(0, VT); - } - if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { - // Produce a vector of zeros. - SDValue El = DAG.getConstant(0, VT.getVectorElementType()); - std::vector<SDValue> Ops(VT.getVectorNumElements(), El); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - &Ops[0], Ops.size()); - } return SDValue(); } @@ -1640,17 +1662,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) - return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); + return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (sub c1, c2) -> c1-c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) if (N1C) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, DAG.getConstant(-N1C->getAPIntValue(), VT)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) if (N0C && N0C->isAllOnesValue()) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) return N1.getOperand(1); @@ -1664,7 +1686,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), VT); - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, N1.getOperand(0)); } // fold ((A+(B+or-C))-B) -> A+or-C @@ -1672,19 +1694,19 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { (N0.getOperand(1).getOpcode() == ISD::SUB || N0.getOperand(1).getOpcode() == ISD::ADD) && N0.getOperand(1).getOperand(0) == N1) - return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1).getOperand(1)); // fold ((A+(C+B))-B) -> A+C if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); // fold ((A-(B-C))-C) -> A-B if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); // If either operand of a sub is undef, the result is undef @@ -1698,7 +1720,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { // fold (sub Sym, c) -> Sym-c if (N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, GA->getOffset() - (uint64_t)N1C->getSExtValue()); // fold (sub Sym+c1, Sym+c2) -> c1-c2 @@ -1720,25 +1742,25 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { // If the flag result is dead, turn this into an SUB. if (!N->hasAnyUseOfValue(1)) - return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1), - DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // fold (subc x, x) -> 0 + no borrow if (N0 == N1) return CombineTo(N, DAG.getConstant(0, VT), - DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // fold (subc x, 0) -> x + no borrow if (N1C && N1C->isNullValue()) - return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow if (N0C && N0C->isAllOnesValue()) - return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0), - DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); return SDValue(); @@ -1751,63 +1773,102 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { // fold (sube x, y, false) -> (subc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1); + return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); return SDValue(); } +/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// elements are all the same constant or undefined. +static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { + BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); + if (!C) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + EVT EltVT = N->getValueType(0).getVectorElementType(); + return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs) && + EltVT.getSizeInBits() >= SplatBitSize); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); + // fold (mul x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + bool N0IsConst = false; + bool N1IsConst = false; + APInt ConstValue0, ConstValue1; // fold vector ops if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); + N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); + } else { + N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0; + ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() + : APInt(); + N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0; + ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() + : APInt(); } - // fold (mul x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); // fold (mul c1, c2) -> c1*c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); + if (N0IsConst && N1IsConst) + return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); + // canonicalize constant to RHS - if (N0C && !N1C) - return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); + if (N0IsConst && !N1IsConst) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 - if (N1C && N1C->isNullValue()) + if (N1IsConst && ConstValue1 == 0) return N1; + // We require a splat of the entire scalar bit width for non-contiguous + // bit patterns. + bool IsFullSplat = + ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); + // fold (mul x, 1) -> x + if (N1IsConst && ConstValue1 == 1 && IsFullSplat) + return N0; // fold (mul x, -1) -> 0-x - if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + if (N1IsConst && ConstValue1.isAllOnesValue()) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), N0); // fold (mul x, (1 << c)) -> x << c - if (N1C && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, - DAG.getConstant(N1C->getAPIntValue().logBase2(), + if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, + DAG.getConstant(ConstValue1.logBase2(), getShiftAmountTy(N0.getValueType()))); // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { - unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); + if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { + unsigned Log2Val = (-ConstValue1).logBase2(); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, DAG.getConstant(Log2Val, getShiftAmountTy(N0.getValueType())))); } + + APInt Val; // (mul (shl X, c1), c2) -> (mul X, c2 << c1) - if (N1C && N0.getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N0.getOperand(1))) { - SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + if (N1IsConst && N0.getOpcode() == ISD::SHL && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1)))) { + SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); AddToWorkList(C3.getNode()); - return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } @@ -1816,7 +1877,9 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { { SDValue Sh(0,0), Y(0,0); // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). - if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && + if (N0.getOpcode() == ISD::SHL && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1))) && N0.getNode()->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && @@ -1826,24 +1889,25 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } if (Sh.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); } } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && - isa<ConstantSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, - DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, + if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1)))) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, + DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), - DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, + DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); // reassociate mul - SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); + SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); if (RMUL.getNode() != 0) return RMUL; @@ -1871,13 +1935,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return N0; // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), N0); // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (!VT.isVector()) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), + return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), N0, N1); } // fold (sdiv X, pow2) -> simple ops after legalize @@ -1892,19 +1956,19 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register - SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, DAG.getConstant(VT.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); AddToWorkList(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; - SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, + SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, DAG.getConstant(VT.getSizeInBits() - lg2, getShiftAmountTy(SGN.getValueType()))); - SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); + SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); AddToWorkList(SRL.getNode()); AddToWorkList(ADD.getNode()); // Divide by pow2 - SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, + SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); // If we're dividing by a positive value, we're done. Otherwise, we must @@ -1913,7 +1977,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return SRA; AddToWorkList(SRA.getNode()); - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); } @@ -1952,7 +2016,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); // fold (udiv x, (1 << c)) -> x >>u c if (N1C && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, DAG.getConstant(N1C->getAPIntValue().logBase2(), getShiftAmountTy(N0.getValueType()))); // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 @@ -1960,13 +2024,13 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { EVT ADDVT = N1.getOperand(1).getValueType(); - SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, + SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() .logBase2(), ADDVT)); AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); } } } @@ -2000,19 +2064,19 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 if (!VT.isVector()) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); } // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); + SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); AddToWorkList(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); AddToWorkList(Mul.getNode()); return Sub; } @@ -2040,18 +2104,18 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); // fold (urem x, pow2) -> (and x, pow2-1) if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, DAG.getConstant(N1C->getAPIntValue()-1,VT)); // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { SDValue Add = - DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, + DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); } } } @@ -2059,13 +2123,13 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); + SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); AddToWorkList(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); AddToWorkList(Mul.getNode()); return Sub; } @@ -2086,14 +2150,14 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // fold (mulhs x, 0) -> 0 if (N1C && N1C->isNullValue()) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) if (N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, + return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, DAG.getConstant(N0.getValueType().getSizeInBits() - 1, getShiftAmountTy(N0.getValueType()))); // fold (mulhs x, undef) -> 0 @@ -2124,7 +2188,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // fold (mulhu x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -2166,7 +2230,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!HiExists && (!LegalOperations || TLI.isOperationLegal(LoOp, N->getValueType(0)))) { - SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->op_begin(), N->getNumOperands()); return CombineTo(N, Res, Res); } @@ -2176,7 +2240,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!LoExists && (!LegalOperations || TLI.isOperationLegal(HiOp, N->getValueType(1)))) { - SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->op_begin(), N->getNumOperands()); return CombineTo(N, Res, Res); } @@ -2187,7 +2251,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, // If the two computed results can be simplified separately, separate them. if (LoExists) { - SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->op_begin(), N->getNumOperands()); AddToWorkList(Lo.getNode()); SDValue LoOpt = combine(Lo.getNode()); @@ -2198,7 +2262,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, } if (HiExists) { - SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->op_begin(), N->getNumOperands()); AddToWorkList(Hi.getNode()); SDValue HiOpt = combine(Hi.getNode()); @@ -2216,7 +2280,7 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { if (Res.getNode()) return Res; EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. @@ -2246,7 +2310,7 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { if (Res.getNode()) return Res; EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. @@ -2275,7 +2339,7 @@ SDValue DAGCombiner::visitSMULO(SDNode *N) { // (smulo x, 2) -> (saddo x, x) if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) if (C2->getAPIntValue() == 2) - return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(), + return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), N->getOperand(0), N->getOperand(0)); return SDValue(); @@ -2285,7 +2349,7 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) { // (umulo x, 2) -> (uaddo x, x) if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) if (C2->getAPIntValue() == 2) - return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(), + return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), N->getOperand(0), N->getOperand(0)); return SDValue(); @@ -2336,11 +2400,11 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { - SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); AddToWorkList(ORNode.getNode()); - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); } // For each of OP in SHL/SRL/SRA/AND... @@ -2350,11 +2414,11 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && N0.getOperand(1) == N1.getOperand(1)) { - SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); AddToWorkList(ORNode.getNode()); - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode, N0.getOperand(1)); } @@ -2372,7 +2436,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); EVT In1Ty = In1.getValueType(); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // If both incoming values are integers, and the original types are the // same. if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { @@ -2414,10 +2478,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { } if (SameMask) { - SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0.getOperand(0), N1.getOperand(0)); AddToWorkList(Op.getNode()); - return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, + return DAG.getVectorShuffle(VT, SDLoc(N), Op, DAG.getUNDEF(VT), &SVN0->getMask()[0]); } } @@ -2460,7 +2524,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x if (N1C && N1C->isAllOnesValue()) return N0; @@ -2469,7 +2533,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { APInt::getAllOnesValue(BitWidth))) return DAG.getConstant(0, VT); // reassociate and - SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); + SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); if (RAND.getNode() != 0) return RAND; // fold (and (or x, C), D) -> D if (C & D) == D @@ -2483,7 +2547,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { APInt Mask = ~N1C->getAPIntValue(); Mask = Mask.trunc(N0Op0.getValueSizeInBits()); if (DAG.MaskedValueIsZero(N0Op0, Mask)) { - SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0); // Replace uses of the AND with uses of the Zero extend node. @@ -2496,7 +2560,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } - // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must // already be zero by virtue of the width of the base type of the load. // @@ -2573,7 +2637,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue NewLoad(Load, 0); if (Load->getExtensionType() == ISD::EXTLOAD) { NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, - Load->getValueType(0), Load->getDebugLoc(), + Load->getValueType(0), SDLoc(Load), Load->getChain(), Load->getBasePtr(), Load->getOffset(), Load->getMemoryVT(), Load->getMemOperand()); @@ -2604,26 +2668,39 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LL.getValueType().isInteger()) { // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { - SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorkList(ORNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorkList(ANDNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorkList(ORNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } } + // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) + if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && + Op0 == Op1 && LL.getValueType().isInteger() && + Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && + cast<ConstantSDNode>(RR)->isAllOnesValue()) || + (cast<ConstantSDNode>(LR)->isAllOnesValue() && + cast<ConstantSDNode>(RR)->isNullValue()))) { + SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), + LL, DAG.getConstant(1, LL.getValueType())); + AddToWorkList(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(N), VT, ADDNode, + DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); + } // canonicalize equivalent to ll == rl if (LL == RR && LR == RL) { Op1 = ISD::getSetCCSwappedOperands(Op1); @@ -2636,8 +2713,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && TLI.isOperationLegal(ISD::SETCC, - TLI.getSetCCResultType(N0.getSimpleValueType()))))) - return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + getSetCCResultType(N0.getSimpleValueType()))))) + return DAG.getSetCC(SDLoc(N), N0.getValueType(), LL, LR, Result); } } @@ -2665,11 +2742,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + MemVT, LN0->getMemOperand()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2687,12 +2762,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, - LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2710,7 +2782,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && - LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { + LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); @@ -2721,11 +2793,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, - LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, + LN0->getChain(), LN0->getBasePtr(), ExtVT, + LN0->getMemOperand()); AddToWorkList(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2748,7 +2818,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned LVTStoreBytes = LoadedVT.getStoreSize(); unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, NewPtr, DAG.getConstant(PtrOff, PtrType)); Alignment = MinAlign(Alignment, PtrOff); } @@ -2757,11 +2827,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - Alignment); + Alignment, LN0->getTBAAInfo()); AddToWorkList(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2786,7 +2856,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ADDC |= Mask; if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { SDValue NewAdd = - DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, + DAG.getNode(ISD::ADD, SDLoc(N0), VT, N0.getOperand(0), DAG.getConstant(ADDC, VT)); CombineTo(N0.getNode(), NewAdd); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2797,6 +2867,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) + if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode()) + return BSwap; + } + return SDValue(); } @@ -2881,17 +2959,27 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (N00 != N10) return SDValue(); - // Make sure everything beyond the low halfword is zero since the SRL 16 - // will clear the top bits. + // Make sure everything beyond the low halfword gets set to zero since the SRL + // 16 will clear the top bits. unsigned OpSizeInBits = VT.getSizeInBits(); - if (DemandHighBits && OpSizeInBits > 16 && - (!LookPassAnd0 || !LookPassAnd1) && - !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) - return SDValue(); + if (DemandHighBits && OpSizeInBits > 16) { + // If the left-shift isn't masked out then the only way this is a bswap is + // if all bits beyond the low 8 are 0. In that case the entire pattern + // reduces to a left shift anyway: leave it for other parts of the combiner. + if (!LookPassAnd0) + return SDValue(); - SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); + // However, if the right shift isn't masked out then it might be because + // it's not needed. See if we can spot that too. + if (!LookPassAnd1 && + !DAG.MaskedValueIsZero( + N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) + return SDValue(); + } + + SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); if (OpSizeInBits > 16) - Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, + Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); return Res; } @@ -2899,7 +2987,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, /// isBSwapHWordElement - Return true if the specified node is an element /// that makes up a 32-bit packed halfword byteswap. i.e. /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) -static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { +static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) { if (!N.getNode()->hasOneUse()) return false; @@ -3024,19 +3112,19 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) return SDValue(); - SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, + SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, SDValue(Parts[0],0)); - // Result of the bswap should be rotated by 16. If it's not legal, than + // Result of the bswap should be rotated by 16. If it's not legal, then // do (x << 16) | (x >> 16). SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) - return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); + return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) - return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), - DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); + return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, SDLoc(N), VT, + DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); } SDValue DAGCombiner::visitOR(SDNode *N) { @@ -3076,7 +3164,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x if (N1C && N1C->isNullValue()) return N0; @@ -3096,7 +3184,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return BSwap; // reassociate or - SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); + SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); if (ROR.getNode() != 0) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) @@ -3105,8 +3193,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); } @@ -3121,19 +3209,19 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) if (cast<ConstantSDNode>(LR)->isNullValue() && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { - SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorkList(ORNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { - SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorkList(ANDNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } } // canonicalize equivalent to ll == rl @@ -3148,8 +3236,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && TLI.isOperationLegal(ISD::SETCC, - TLI.getSetCCResultType(N0.getValueType()))))) - return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + getSetCCResultType(N0.getValueType()))))) + return DAG.getSetCC(SDLoc(N), N0.getValueType(), LL, LR, Result); } } @@ -3176,15 +3264,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { - SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1.getOperand(0)); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, + return DAG.getNode(ISD::AND, SDLoc(N), VT, X, DAG.getConstant(LHSMask | RHSMask, VT)); } } // See if this is some rotate idiom. - if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) + if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); // Simplify the operands using demanded-bits information. @@ -3217,7 +3305,7 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return 0; @@ -3292,33 +3380,9 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (LHSMask.getNode() || RHSMask.getNode()) return 0; - // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) - // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) - if (RHSShiftAmt.getOpcode() == ISD::SUB && - LHSShiftAmt == RHSShiftAmt.getOperand(1)) { - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } - } - } - - // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) - // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) - if (LHSShiftAmt.getOpcode() == ISD::SUB && - RHSShiftAmt == LHSShiftAmt.getOperand(1)) { - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } - } - } - - // Look for sign/zext/any-extended or truncate cases: + // If the shift amount is sign/zext/any-extended just peel it off. + SDValue LExtOp0 = LHSShiftAmt; + SDValue RExtOp0 = RHSShiftAmt; if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || @@ -3327,37 +3391,31 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { - SDValue LExtOp0 = LHSShiftAmt.getOperand(0); - SDValue RExtOp0 = RHSShiftAmt.getOperand(0); - if (RExtOp0.getOpcode() == ISD::SUB && - RExtOp0.getOperand(1) == LExtOp0) { - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotl x, y) - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotr x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, - LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } - } - } else if (LExtOp0.getOpcode() == ISD::SUB && - RExtOp0 == LExtOp0.getOperand(1)) { - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotr x, y) - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotl x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, - LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } - } - } + LExtOp0 = LHSShiftAmt.getOperand(0); + RExtOp0 = RHSShiftAmt.getOperand(0); + } + + if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) { + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotr x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) + if (SUBC->getAPIntValue() == OpSizeInBits) + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } else if (LExtOp0.getOpcode() == ISD::SUB && + RExtOp0 == LExtOp0.getOperand(1)) { + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotr x, y) + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotl x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) + if (SUBC->getAPIntValue() == OpSizeInBits) + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); } return 0; @@ -3396,12 +3454,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x if (N1C && N1C->isNullValue()) return N0; // reassociate xor - SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); + SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); if (RXOR.getNode() != 0) return RXOR; @@ -3417,9 +3475,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { default: llvm_unreachable("Unhandled SetCC Equivalent!"); case ISD::SETCC: - return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); + return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); case ISD::SELECT_CC: - return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), + return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), N0.getOperand(3), NotCC); } } @@ -3430,10 +3488,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { N0.getNode()->hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ SDValue V = N0.getOperand(0); - V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, + V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, DAG.getConstant(1, V.getValueType())); AddToWorkList(V.getNode()); - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc @@ -3442,10 +3500,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); - return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants @@ -3454,28 +3512,36 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); - return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } + // fold (xor (and x, y), y) -> (and (not x), y) + if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + N0->getOperand(1) == N1) { + SDValue X = N0->getOperand(0); + SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); + AddToWorkList(NotX.getNode()); + return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); + } // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) if (N1C && N0.getOpcode() == ISD::XOR) { ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); if (N00C) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), DAG.getConstant(N1C->getAPIntValue() ^ N00C->getAPIntValue(), VT)); if (N01C) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(N1C->getAPIntValue() ^ N01C->getAPIntValue(), VT)); } // fold (xor x, x) -> 0 if (N0 == N1) - return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); + return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { @@ -3548,17 +3614,17 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { } // Fold the constants, shifting the binop RHS by the shift amount. - SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), + SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), N->getValueType(0), LHS->getOperand(1), N->getOperand(1)); // Create the new shift. SDValue NewShift = DAG.getNode(N->getOpcode(), - LHS->getOperand(0).getDebugLoc(), + SDLoc(LHS->getOperand(0)), VT, LHS->getOperand(0), N->getOperand(1)); // Create the new binop. - return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); + return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); } SDValue DAGCombiner::visitSHL(SDNode *N) { @@ -3569,6 +3635,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (shl c1, c2) -> c1<<c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); @@ -3598,10 +3670,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, + DAG.getNode(ISD::AND, SDLoc(N), TruncVT, DAG.getNode(ISD::TRUNCATE, - N->getDebugLoc(), + SDLoc(N), TruncVT, N100), DAG.getConstant(TruncC, TruncVT))); } @@ -3617,7 +3689,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { uint64_t c2 = N1C->getZExtValue(); if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } @@ -3639,13 +3711,34 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (c2 >= OpSizeInBits - InnerShiftSize) { if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, - DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, + return DAG.getNode(ISD::SHL, SDLoc(N0), VT, + DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, N0.getOperand(0)->getOperand(0)), DAG.getConstant(c1 + c2, N1.getValueType())); } } + // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) + // Only fold this if the inner zext has no other uses to avoid increasing + // the total number of instructions. + if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); + if (c1 < VT.getSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + if (c1 == c2) { + SDValue NewOp0 = N0.getOperand(0); + EVT CountVT = NewOp0.getOperand(1).getValueType(); + SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), + NewOp0, DAG.getConstant(c2, CountVT)); + AddToWorkList(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + } + } + } + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding @@ -3660,14 +3753,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue Shift; if (c2 > c1) { Mask = Mask.shl(c2-c1); - Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c2-c1, N1.getValueType())); } else { Mask = Mask.lshr(c1-c2); - Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c1-c2, N1.getValueType())); } - return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift, + return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, DAG.getConstant(Mask, VT)); } } @@ -3678,7 +3771,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { VT.getSizeInBits() - N1C->getZExtValue()), VT); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), HiBitsMask); } @@ -3699,6 +3792,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (sra c1, c2) -> (sra c1, c2) if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); @@ -3724,7 +3823,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { ExtVT, VT.getVectorNumElements()); if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), DAG.getValueType(ExtVT)); } @@ -3733,7 +3832,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; - return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(Sum, N1C->getValueType(0))); } } @@ -3765,11 +3864,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy(N0.getOperand(0).getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, + SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, N0.getOperand(0), Amt); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, Shift); - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Trunc); } } @@ -3785,11 +3884,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); - return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::AND, N->getDebugLoc(), + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, + DAG.getNode(ISD::AND, SDLoc(N), TruncVT, DAG.getNode(ISD::TRUNCATE, - N->getDebugLoc(), + SDLoc(N), TruncVT, N100), DAG.getConstant(TruncC, TruncVT))); } @@ -3812,9 +3911,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue Amt = DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, + SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, N0.getOperand(0).getOperand(0), Amt); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); } } @@ -3825,7 +3924,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // If the sign bit is known to be zero, switch this to a SRL. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C) { SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); @@ -3844,6 +3943,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (srl c1, c2) -> c1 >>u c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); @@ -3868,7 +3973,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { uint64_t c2 = N1C->getZExtValue(); if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } @@ -3886,8 +3991,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (c1 + OpSizeInBits == InnerShiftSize) { if (c1 + c2 >= InnerShiftSize) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, - DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, + return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, + DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, N0.getOperand(0)->getOperand(0), DAG.getConstant(c1 + c2, ShiftCountVT))); } @@ -3897,12 +4002,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && N0.getValueSizeInBits() <= 64) { uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(~0ULL >> ShAmt, VT)); } - - // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) + // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? EVT SmallVT = N0.getOperand(0).getValueType(); @@ -3911,11 +4015,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { uint64_t ShiftAmt = N1C->getZExtValue(); - SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, N0.getOperand(0), DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); AddToWorkList(SmallShift.getNode()); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt); + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), + DAG.getConstant(Mask, VT)); } } @@ -3923,7 +4030,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // bit, which is unmodified by sra. if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { if (N0.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); } // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). @@ -3951,12 +4058,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue Op = N0.getOperand(0); if (ShAmt) { - Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, + Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); AddToWorkList(Op.getNode()); } - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + return DAG.getNode(ISD::XOR, SDLoc(N), VT, Op, DAG.getConstant(1, VT)); } } @@ -3971,11 +4078,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::AND, N->getDebugLoc(), + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, + DAG.getNode(ISD::AND, SDLoc(N), TruncVT, DAG.getNode(ISD::TRUNCATE, - N->getDebugLoc(), + SDLoc(N), TruncVT, N100), DAG.getConstant(TruncC, TruncVT))); } @@ -4035,7 +4142,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { // fold (ctlz c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4045,7 +4152,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { // fold (ctlz_zero_undef c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4055,7 +4162,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { // fold (cttz c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4065,7 +4172,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { // fold (cttz_zero_undef c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4075,7 +4182,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { // fold (ctpop c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } @@ -4100,7 +4207,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return N2; // fold (select C, 1, X) -> (or C, X) if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) if (VT.isInteger() && (VT0 == MVT::i1 || @@ -4110,38 +4217,38 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, + return DAG.getNode(ISD::XOR, SDLoc(N), VT0, N0, DAG.getConstant(1, VT0)); - XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, + XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, N0, DAG.getConstant(1, VT0)); AddToWorkList(XORNode.getNode()); if (VT.bitsGT(VT0)) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); } // fold (select C, 0, X) -> (and (not C), X) if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { - SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorkList(NOTNode.getNode()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); + return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { - SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorkList(NOTNode.getNode()); - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } // fold (select C, X, 0) -> (and C, X) if (VT == MVT::i1 && N2C && N2C->isNullValue()) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or X, Y) if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N1, N2)) @@ -4155,20 +4262,37 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // about, since there is no way to mark an opcode illegal at all value types if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); - return SimplifySelect(N->getDebugLoc(), N0, N1, N2); + return SimplifySelect(SDLoc(N), N0, N1, N2); } return SDValue(); } +static +std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // Split the inputs. + SDValue Lo, Hi, LL, LH, RL, RH; + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + + return std::make_pair(Lo, Hi); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Canonicalize integer abs. // vselect (setg[te] X, 0), X, -X -> @@ -4201,6 +4325,34 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } + // If the VSELECT result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (N0.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; + llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); + + Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); + Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); + + // Add the new VSELECT nodes to the work list in case they need to be split + // again. + AddToWorkList(Lo.getNode()); + AddToWorkList(Hi.getNode()); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + } + return SDValue(); } @@ -4217,35 +4369,37 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // Determine if the condition we're dealing with is constant - SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), - N0, N1, CC, N->getDebugLoc(), false); - if (SCC.getNode()) AddToWorkList(SCC.getNode()); + SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), + N0, N1, CC, SDLoc(N), false); + if (SCC.getNode()) { + AddToWorkList(SCC.getNode()); - if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { - if (!SCCC->isNullValue()) - return N2; // cond always true -> true val - else - return N3; // cond always false -> false val - } + if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { + if (!SCCC->isNullValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } - // Fold to a simpler select_cc - if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), - SCC.getOperand(0), SCC.getOperand(1), N2, N3, - SCC.getOperand(2)); + // Fold to a simpler select_cc + if (SCC.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2)); + } // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N2, N3)) return SDValue(N, 0); // Don't revisit N. // fold select_cc into other things, such as min/max/abs - return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); + return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); } SDValue DAGCombiner::visitSETCC(SDNode *N) { return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), cast<CondCodeSDNode>(N->getOperand(2))->get(), - N->getDebugLoc()); + SDLoc(N)); } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: @@ -4254,7 +4408,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { // mentioned transformation is profitable. static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, unsigned ExtOpc, - SmallVector<SDNode*, 4> &ExtendNodes, + SmallVectorImpl<SDNode *> &ExtendNodes, const TargetLowering &TLI) { bool HasCopyToRegUses = false; bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); @@ -4312,8 +4466,8 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, return true; } -void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, - SDValue Trunc, SDValue ExtLoad, DebugLoc DL, +void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, + SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType) { // Extend SetCC uses if necessary. for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { @@ -4340,12 +4494,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0); // fold (sext (sext x)) -> (sext x) // fold (sext (aext x)) -> (sext x) if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0.getOperand(0)); if (N0.getOpcode() == ISD::TRUNCATE) { @@ -4379,22 +4533,22 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign // bits, just sext from i32. if (NumSignBits > OpBits-MidBits) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); } else { // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign // bits, just truncate to i32. if (NumSignBits > OpBits-MidBits) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); } // fold (sext (truncate x)) -> (sextinreg x). if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, N0.getValueType())) { if (OpBits < DestBits) - Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); else if (OpBits > DestBits) - Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, DAG.getValueType(N0.getValueType())); } } @@ -4412,17 +4566,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4436,15 +4588,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4467,23 +4617,20 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getMemoryVT(), - LN0->isVolatile(), - LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ExtLoad, DAG.getConstant(Mask, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, - N0.getOperand(0).getDebugLoc(), + SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4494,13 +4641,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. if (VT.isVector() && !LegalOperations && - TLI.getBooleanContents(true) == + TLI.getBooleanContents(true) == TargetLowering::ZeroOrNegativeOneBooleanContent) { EVT N0VT = N0.getOperand(0).getValueType(); // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) // if this is the case. - EVT SVT = TLI.getSetCCResultType(N0VT); + EVT SVT = getSetCCResultType(N0VT); // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result @@ -4508,24 +4655,19 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == SVT.getSizeInBits()) - return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - + EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); if (SVT == MatchingVectorType) { - SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); } } @@ -4534,24 +4676,26 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue NegOne = DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); SDValue SCC = - SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!VT.isVector() && (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))) - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, - DAG.getSetCC(N->getDebugLoc(), - TLI.getSetCCResultType(VT), - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), - NegOne, DAG.getConstant(0, VT)); + if (!VT.isVector() && + (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) { + return DAG.getSelect(SDLoc(N), VT, + DAG.getSetCC(SDLoc(N), + getSetCCResultType(VT), + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), + NegOne, DAG.getConstant(0, VT)); + } } // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); return SDValue(); } @@ -4600,11 +4744,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); // fold (zext (truncate x)) -> (zext x) or @@ -4623,9 +4767,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { VT.getSizeInBits())); if (TruncatedBits == (KnownZero & TruncatedBits)) { if (VT.bitsGT(Op.getValueType())) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); if (VT.bitsLT(Op.getValueType())) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); return Op; } @@ -4665,13 +4809,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { - Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); AddToWorkList(Op.getNode()); } else if (Op.getValueType().bitsGT(VT)) { - Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); AddToWorkList(Op.getNode()); } - return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), + return DAG.getZeroExtendInReg(Op, SDLoc(N), N0.getValueType().getScalarType()); } @@ -4685,13 +4829,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); + X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, X, DAG.getConstant(Mask, VT)); } @@ -4708,18 +4852,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4741,23 +4883,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getMemoryVT(), - LN0->isVolatile(), - LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ExtLoad, DAG.getConstant(Mask, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, - N0.getOperand(0).getDebugLoc(), + SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4772,15 +4911,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4801,11 +4938,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()), - DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &OneOps[0], OneOps.size())); // If the desired elements are smaller or larger than the source @@ -4818,18 +4955,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), - DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), + DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &OneOps[0], OneOps.size())); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = - SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; @@ -4852,7 +4989,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return SDValue(); } - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Ensure that the shift amount is wide enough for the shifted value. if (VT.getSizeInBits() >= 256) @@ -4872,14 +5009,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0); // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND) - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); // fold (aext (truncate (load x))) -> (aext (smaller load x)) // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) @@ -4902,8 +5039,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (TruncOp.getValueType() == VT) return TruncOp; // x iff x size == zext size. if (TruncOp.getValueType().bitsGT(VT)) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); } // Fold (aext (and (trunc x), cst)) -> (and x, cst) @@ -4915,13 +5052,13 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.getValueType())) { SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); + X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, X, DAG.getConstant(Mask, VT)); } @@ -4938,17 +5075,15 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ANY_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4962,14 +5097,12 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4986,7 +5119,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source @@ -5000,16 +5133,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); } } // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = - SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) @@ -5030,9 +5163,8 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { assert(CV != 0 && "Const value should be ConstSDNode."); const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; - if (NewVal != CVal) { + if (NewVal != CVal) return DAG.getConstant(NewVal, V.getValueType()); - } break; } case ISD::OR: @@ -5056,7 +5188,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { APInt NewMask = Mask << Amt; SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); if (SimplifyLHS.getNode()) - return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), + return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } } @@ -5160,12 +5292,19 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // For the transform to be legal, the load must produce only two values // (the value loaded and the chain). Don't transform a pre-increment - // load, for example, which produces an extra value. Otherwise the + // load, for example, which produces an extra value. Otherwise the // transformation is not equivalent, and the downstream logic to replace // uses gets things wrong. if (LN0->getNumValues() > 2) return SDValue(); + // If the load that we're shrinking is an extload and we're not just + // discarding the extension we can't simply shrink the load. Bail. + // TODO: It would be possible to merge the extensions in some cases. + if (LN0->getExtensionType() != ISD::NON_EXTLOAD && + LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) + return SDValue(); + EVT PtrType = N0.getOperand(1).getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) @@ -5182,22 +5321,22 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); - SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, LN0->getBasePtr(), DAG.getConstant(PtrOff, PtrType)); AddToWorkList(NewPtr.getNode()); SDValue Load; if (ExtType == ISD::NON_EXTLOAD) - Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, + Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign); + LN0->isInvariant(), NewAlign, LN0->getTBAAInfo()); else - Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, + Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - NewAlign); + NewAlign, LN0->getTBAAInfo()); // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); @@ -5216,7 +5355,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (ShLeftAmt >= VT.getSizeInBits()) Result = DAG.getConstant(0, VT); else - Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, + Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, Result, DAG.getConstant(ShLeftAmt, ShImmTy)); } @@ -5234,7 +5373,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg c1) -> c1 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) @@ -5242,10 +5381,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), N1); - } // fold (sext_in_reg (sext x)) -> (sext x) // fold (sext_in_reg (aext x)) -> (sext x) @@ -5254,12 +5392,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N00 = N0.getOperand(0); if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) - return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); + return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); // fold operands of sext_in_reg based on knowledge that the top bits are not // demanded. @@ -5282,7 +5420,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) - return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1)); } } @@ -5294,12 +5432,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - EVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); AddToWorkList(ExtLoad.getNode()); @@ -5312,12 +5448,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - EVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -5328,7 +5462,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false); if (BSwap.getNode() != 0) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } @@ -5345,21 +5479,21 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0; // fold (truncate c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // fold (truncate (ext x)) -> (ext x) or (truncate x) or x if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { if (N0.getOperand(0).getValueType().bitsLT(VT)) // if the source is smaller than the dest, we still need an extend - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); if (N0.getOperand(0).getValueType().bitsGT(VT)) // if the source is larger than the dest, than we just need the truncate - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // if the source and dest are the same type, we can drop both the extend // and the truncate. return N0.getOperand(0); @@ -5391,14 +5525,14 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - EVT IndexTy = N0->getOperand(1).getValueType(); + EVT IndexTy = TLI.getVectorIdxTy(); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); - SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, N0.getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - N->getDebugLoc(), TrTy, V, + SDLoc(N), TrTy, V, DAG.getConstant(Index, IndexTy)); } } @@ -5430,7 +5564,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0], + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0], Opnds.size()); } } @@ -5445,7 +5579,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits())); if (Shorter.getNode()) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); } // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) @@ -5488,11 +5622,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { Opnds.push_back(DAG.getUNDEF(VTs[i])); continue; } - SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V); + SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); AddToWorkList(NV.getNode()); Opnds.push_back(NV); } - return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, &Opnds[0], Opnds.size()); } } @@ -5538,7 +5672,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), false, false, false, Align); } @@ -5575,7 +5709,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // If the input is a constant, let getNode fold it. if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { - SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0); + SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); if (Res.getNode() != N) { if (!LegalOperations || TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) @@ -5592,7 +5726,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // (conv (conv x, t1), t2) -> (conv x, t2) if (N0.getOpcode() == ISD::BITCAST) - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) @@ -5600,20 +5734,22 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile() && - (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); unsigned Align = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); if (Align <= OrigAlign) { - SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), + SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), OrigAlign); + LN0->isInvariant(), OrigAlign, + LN0->getTBAAInfo()); AddToWorkList(N); CombineTo(N0.getNode(), - DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + DAG.getNode(ISD::BITCAST, SDLoc(N0), N0.getValueType(), Load), Load.getValue(1)); return Load; @@ -5623,20 +5759,20 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // This often reduces constant pool loads. - if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || - (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector() && !N0.getValueType().isVector()) { - SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, + SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); AddToWorkList(NewConv.getNode()); APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + return DAG.getNode(ISD::XOR, SDLoc(N), VT, NewConv, DAG.getConstant(SignBit, VT)); assert(N0.getOpcode() == ISD::FABS); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, NewConv, DAG.getConstant(~SignBit, VT)); } @@ -5650,38 +5786,38 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { - SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), IntXVT, N0.getOperand(1)); AddToWorkList(X.getNode()); // If X has a different width than the result/lhs, sext it or truncate it. unsigned VTWidth = VT.getSizeInBits(); if (OrigXWidth < VTWidth) { - X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); + X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); AddToWorkList(X.getNode()); } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. - X = DAG.getNode(ISD::SRL, X.getDebugLoc(), + X = DAG.getNode(ISD::SRL, SDLoc(X), X.getValueType(), X, DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); AddToWorkList(X.getNode()); - X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); AddToWorkList(X.getNode()); } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); - X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, + X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, VT)); AddToWorkList(X.getNode()); - SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); - Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, + Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, Cst, DAG.getConstant(~SignBit, VT)); AddToWorkList(Cst.getNode()); - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); + return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); } } @@ -5722,8 +5858,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Due to the FP element handling below calling this routine recursively, // we can end up with a scalar-to-vector node here. if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, - DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, + DAG.getNode(ISD::BITCAST, SDLoc(BV), DstEltVT, BV->getOperand(0))); SmallVector<SDValue, 8> Ops; @@ -5732,12 +5868,12 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // If the vector element type is not legal, the BUILD_VECTOR operands // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) - Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); - Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); + Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } - return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, &Ops[0], Ops.size()); } @@ -5794,7 +5930,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, &Ops[0], Ops.size()); } @@ -5821,7 +5957,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) // Simply turn this into a SCALAR_TO_VECTOR of the new type. - return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, Ops[0]); OpVal = OpVal.lshr(DstBitSize); } @@ -5831,7 +5967,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, &Ops[0], Ops.size()); } @@ -5850,10 +5986,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); // fold (fadd A, 0) -> A if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) @@ -5861,20 +5997,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) - return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) - return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(1), N1)); // No FP constant should be created after legalization as Instruction @@ -5883,22 +6019,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // We don't need test this condition for transformation like following, as // the DAG being transformed implies it is legal to take FP constant as // operand. - // + // // (fadd (fmul c, x), x) -> (fmul c+1, x) - // + // bool AllowNewFpConst = (Level < AfterLegalizeDAG); // If allow, fold (fadd (fneg x), x) -> 0.0 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { + N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, VT); - } // If allow, fold (fadd x, (fneg x)) -> 0.0 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { + N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) return DAG.getConstantFP(0.0, VT); - } // In unsafe math mode, we can fold chains of FADD's of the same value // into multiplications. This transform is not safe in general because @@ -5910,43 +6044,43 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - // (fadd (fmul c, x), x) -> (fmul c+1, x) + // (fadd (fmul c, x), x) -> (fmul x, c+1) if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP00, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); } - // (fadd (fmul x, c), x) -> (fmul c+1, x) + // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP01, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); } - // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) + // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2) if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(1) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP00, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(1), NewCFP); } - // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) + // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP01, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), NewCFP); } } @@ -5955,98 +6089,93 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); - // (fadd x, (fmul c, x)) -> (fmul c+1, x) + // (fadd x, (fmul c, x)) -> (fmul x, c+1) if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP10, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); } - // (fadd x, (fmul x, c)) -> (fmul c+1, x) + // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP11, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); } - // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) - if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(1) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2) + if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(1) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP10, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N0.getOperand(1), NewCFP); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N1.getOperand(1), NewCFP); } - // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) - if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) + if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP11, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N0.getOperand(0), NewCFP); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N1.getOperand(0), NewCFP); } } if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); - // (fadd (fadd x, x), x) -> (fmul 3.0, x) + // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP && N0.getOperand(0) == N0.getOperand(1) && - (N0.getOperand(0) == N1)) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + (N0.getOperand(0) == N1)) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, DAG.getConstantFP(3.0, VT)); - } } if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); - // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1.getOperand(0) == N0) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, DAG.getConstantFP(3.0, VT)); - } } - // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) + // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) if (AllowNewFpConst && N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0) == N1.getOperand(0)) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstantFP(4.0, VT)); - } } // FADD -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1); - } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); - } } return SDValue(); @@ -6058,7 +6187,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // fold vector ops if (VT.isVector()) { @@ -6068,7 +6197,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); // fold (fsub A, 0) -> A if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) @@ -6101,8 +6230,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N11, DAG, LegalOperations); - else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, - &DAG.getTarget().Options)) + + if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, + &DAG.getTarget().Options)) return GetNegatedExpression(N10, DAG, LegalOperations); } } @@ -6110,27 +6240,25 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // FSUB -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(ISD::FNEG, dl, VT, N1)); - } // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) return DAG.getNode(ISD::FMA, dl, VT, DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), N1.getOperand(1), N0); - } - // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) - if (N0.getOpcode() == ISD::FNEG && + // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0).getOperand(0); @@ -6160,10 +6288,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); // fold (fmul A, 0) -> 0 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) @@ -6177,21 +6305,21 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return N0; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations)); } @@ -6201,8 +6329,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), - DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(1), N1)); return SDValue(); @@ -6215,7 +6343,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (DAG.getTarget().Options.UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) @@ -6224,13 +6352,13 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return N2; } if (N0CFP && N0CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); // Canonicalize (fma c, x, y) -> (fma x, c, y) if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && @@ -6267,21 +6395,17 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } // (fma x, c, x) -> (fmul x, (c+1)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { - return DAG.getNode(ISD::FMUL, dl, VT, - N0, + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) + return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(1.0, VT))); - } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, dl, VT, - N0, + N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) + return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(-1.0, VT))); - } return SDValue(); @@ -6303,7 +6427,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { @@ -6320,7 +6444,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, DAG.getConstantFP(Recip, VT)); } @@ -6332,7 +6456,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations)); } @@ -6350,7 +6474,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); return SDValue(); } @@ -6363,7 +6487,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { EVT VT = N->getValueType(0); if (N0CFP && N1CFP) // Constant fold - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); if (N1CFP) { const APFloat& V = N1CFP->getValueAPF(); @@ -6371,11 +6495,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) if (!V.isNegative()) { if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); } else { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, - DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, + DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); } } @@ -6384,22 +6508,22 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(copysign(x,z), y) -> copysign(x, y) if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); // copysign(x, abs(y)) -> abs(x) if (N1.getOpcode() == ISD::FABS) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // copysign(x, copysign(y,z)) -> copysign(x, z) if (N1.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); return SDValue(); @@ -6416,7 +6540,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) - return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, // but UINT_TO_FP is legal on this target, try to convert. @@ -6424,7 +6548,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to UINT_TO_FP. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); } // The next optimizations are desireable only if SELECT_CC can be lowered. @@ -6442,7 +6566,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> @@ -6455,7 +6579,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); } } @@ -6473,7 +6597,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) - return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, // but SINT_TO_FP is legal on this target, try to convert. @@ -6481,7 +6605,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to SINT_TO_FP. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); } // The next optimizations are desireable only if SELECT_CC can be lowered. @@ -6499,7 +6623,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); } } @@ -6513,7 +6637,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { // fold (fp_to_sint c1fp) -> c1 if (N0CFP) - return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); return SDValue(); } @@ -6525,7 +6649,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { // fold (fp_to_uint c1fp) -> c1 if (N0CFP) - return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); return SDValue(); } @@ -6538,7 +6662,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // fold (fp_round c1fp) -> c1fp if (N0CFP) - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); // fold (fp_round (fp_extend x)) -> x if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) @@ -6549,16 +6673,16 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // This is a value preserving truncation if both round's are. bool IsTrunc = N->getConstantOperandVal(1) == 1 && N0.getNode()->getConstantOperandVal(1) == 1; - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), DAG.getIntPtrConstant(IsTrunc)); } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { - SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, + SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorkList(Tmp.getNode()); - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, Tmp, N0.getOperand(1)); } @@ -6574,7 +6698,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { // fold (fp_round_inreg c1fp) -> c1fp if (N0CFP && isTypeLegal(EVT)) { SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); - return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); } return SDValue(); @@ -6592,7 +6716,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fp_extend c1fp) -> c1fp if (N0CFP) - return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. @@ -6601,25 +6725,23 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue In = N0.getOperand(0); if (In.getValueType() == VT) return In; if (VT.bitsLT(In.getValueType())) - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, In, N0.getOperand(1)); - return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); } // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), + DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -6650,10 +6772,10 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { - Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, + Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } } @@ -6661,12 +6783,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // (fneg (fmul c, x)) -> (fmul -c, x) if (N0.getOpcode() == ISD::FMUL) { ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - if (CFP1) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + if (CFP1) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); - } } return SDValue(); @@ -6679,7 +6800,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { // fold (fceil c1) -> fceil(c1) if (N0CFP) - return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); return SDValue(); } @@ -6691,7 +6812,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { // fold (ftrunc c1) -> ftrunc(c1) if (N0CFP) - return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); return SDValue(); } @@ -6703,7 +6824,7 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) { // fold (ffloor c1) -> ffloor(c1) if (N0CFP) - return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); return SDValue(); } @@ -6720,28 +6841,28 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // fold (fabs c1) -> fabs(c1) if (N0CFP) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) return N->getOperand(0); // fold (fabs (fneg x)) -> (fabs x) // fold (fabs (fcopysign x, y)) -> (fabs x) if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. - if (!TLI.isFAbsFree(VT) && + if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { - Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, + Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } } @@ -6765,7 +6886,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (N1.getOpcode() == ISD::SETCC && TLI.isOperationLegalOrCustom(ISD::BR_CC, N1.getOperand(0).getValueType())) { - return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain, N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2); } @@ -6811,12 +6932,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (AndConst.isPowerOf2() && cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { SDValue SetCC = - DAG.getSetCC(N->getDebugLoc(), - TLI.getSetCCResultType(Op0.getValueType()), + DAG.getSetCC(SDLoc(N), + getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, Op0.getValueType()), ISD::SETNE); - SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); // Don't add the new BRCond into the worklist or else SimplifySelectCC // will convert it back to (X & C1) >> C2. @@ -6861,7 +6982,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { DAG.ReplaceAllUsesOfValueWith(N1, Tmp); removeFromWorkList(TheXor); DAG.DeleteNode(TheXor); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, Tmp, N2); } @@ -6882,8 +7003,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { EVT SetCCVT = N1.getValueType(); if (LegalTypes) - SetCCVT = TLI.getSetCCResultType(SetCCVT); - SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), + SetCCVT = getSetCCResultType(SetCCVT); + SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, Equal ? ISD::SETEQ : ISD::SETNE); @@ -6892,7 +7013,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { DAG.ReplaceAllUsesOfValueWith(N1, SetCC); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); } } @@ -6913,14 +7034,14 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { // MachineBasicBlock CFG, which is awkward. // Use SimplifySetCC to simplify SETCC's. - SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), - CondLHS, CondRHS, CC->get(), N->getDebugLoc(), + SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), + CondLHS, CondRHS, CC->get(), SDLoc(N), false); if (Simp.getNode()) AddToWorkList(Simp.getNode()); // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) - return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, N->getOperand(0), Simp.getOperand(2), Simp.getOperand(0), Simp.getOperand(1), N->getOperand(4)); @@ -7118,10 +7239,10 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDValue Result; if (isLoad) - Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); else - Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; @@ -7156,7 +7277,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // x0 * offset0 + y0 * ptr0 = t0 // knowing that // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) - // + // // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the // indexed load/store and the expresion that needs to be re-written. // @@ -7186,7 +7307,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, - OtherUses[i]->getDebugLoc(), + SDLoc(OtherUses[i]), OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); removeFromWorkList(OtherUses[i]); @@ -7278,7 +7399,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { for (SDNode::use_iterator III = Use->use_begin(), EEE = Use->use_end(); III != EEE; ++III) { SDNode *UseUse = *III; - if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) + if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } @@ -7295,9 +7416,9 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Check for #2 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { SDValue Result = isLoad - ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM) - : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; @@ -7403,17 +7524,20 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getMemOperand()->getBaseAlignment()) { SDValue NewLoad = - DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), Align); + LD->isVolatile(), LD->isNonTemporal(), Align, + LD->getTBAAInfo()); return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } - if (CombinerAA) { + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -7423,22 +7547,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Replace the chain to void dependency. if (LD->getExtensionType() == ISD::NON_EXTLOAD) { - ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), - BetterChain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), + BetterChain, Ptr, LD->getMemOperand()); } else { - ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), LD->getValueType(0), - BetterChain, Ptr, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), - LD->isNonTemporal(), - LD->getAlignment()); + BetterChain, Ptr, LD->getMemoryVT(), + LD->getMemOperand()); } // Create token factor to keep old chain connected. - SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplLoad.getValue(1)); // Make sure the new and old chains are cleaned up. @@ -7454,9 +7573,562 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); + // Try to slice up N to more direct loads if the slices are mapped to + // different register banks or pairing can take place. + if (SliceUpLoad(N)) + return SDValue(N, 0); + return SDValue(); } +namespace { +/// \brief Helper structure used to slice a load in smaller loads. +/// Basically a slice is obtained from the following sequence: +/// Origin = load Ty1, Base +/// Shift = srl Ty1 Origin, CstTy Amount +/// Inst = trunc Shift to Ty2 +/// +/// Then, it will be rewriten into: +/// Slice = load SliceTy, Base + SliceOffset +/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 +/// +/// SliceTy is deduced from the number of bits that are actually used to +/// build Inst. +struct LoadedSlice { + /// \brief Helper structure used to compute the cost of a slice. + struct Cost { + /// Are we optimizing for code size. + bool ForCodeSize; + /// Various cost. + unsigned Loads; + unsigned Truncates; + unsigned CrossRegisterBanksCopies; + unsigned ZExts; + unsigned Shift; + + Cost(bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} + + /// \brief Get the cost of one isolated slice. + Cost(const LoadedSlice &LS, bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { + EVT TruncType = LS.Inst->getValueType(0); + EVT LoadedType = LS.getLoadedType(); + if (TruncType != LoadedType && + !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) + ZExts = 1; + } + + /// \brief Account for slicing gain in the current cost. + /// Slicing provide a few gains like removing a shift or a + /// truncate. This method allows to grow the cost of the original + /// load with the gain from this slice. + void addSliceGain(const LoadedSlice &LS) { + // Each slice saves a truncate. + const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); + if (!TLI.isTruncateFree(LS.Inst->getValueType(0), + LS.Inst->getOperand(0).getValueType())) + ++Truncates; + // If there is a shift amount, this slice gets rid of it. + if (LS.Shift) + ++Shift; + // If this slice can merge a cross register bank copy, account for it. + if (LS.canMergeExpensiveCrossRegisterBankCopy()) + ++CrossRegisterBanksCopies; + } + + Cost &operator+=(const Cost &RHS) { + Loads += RHS.Loads; + Truncates += RHS.Truncates; + CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; + ZExts += RHS.ZExts; + Shift += RHS.Shift; + return *this; + } + + bool operator==(const Cost &RHS) const { + return Loads == RHS.Loads && Truncates == RHS.Truncates && + CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && + ZExts == RHS.ZExts && Shift == RHS.Shift; + } + + bool operator!=(const Cost &RHS) const { return !(*this == RHS); } + + bool operator<(const Cost &RHS) const { + // Assume cross register banks copies are as expensive as loads. + // FIXME: Do we want some more target hooks? + unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; + unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; + // Unless we are optimizing for code size, consider the + // expensive operation first. + if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) + return ExpensiveOpsLHS < ExpensiveOpsRHS; + return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < + (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); + } + + bool operator>(const Cost &RHS) const { return RHS < *this; } + + bool operator<=(const Cost &RHS) const { return !(RHS < *this); } + + bool operator>=(const Cost &RHS) const { return !(*this < RHS); } + }; + // The last instruction that represent the slice. This should be a + // truncate instruction. + SDNode *Inst; + // The original load instruction. + LoadSDNode *Origin; + // The right shift amount in bits from the original load. + unsigned Shift; + // The DAG from which Origin came from. + // This is used to get some contextual information about legal types, etc. + SelectionDAG *DAG; + + LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL, + unsigned Shift = 0, SelectionDAG *DAG = NULL) + : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} + + LoadedSlice(const LoadedSlice &LS) + : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} + + /// \brief Get the bits used in a chunk of bits \p BitWidth large. + /// \return Result is \p BitWidth and has used bits set to 1 and + /// not used bits set to 0. + APInt getUsedBits() const { + // Reproduce the trunc(lshr) sequence: + // - Start from the truncated value. + // - Zero extend to the desired bit width. + // - Shift left. + assert(Origin && "No original load to compare against."); + unsigned BitWidth = Origin->getValueSizeInBits(0); + assert(Inst && "This slice is not bound to an instruction"); + assert(Inst->getValueSizeInBits(0) <= BitWidth && + "Extracted slice is bigger than the whole type!"); + APInt UsedBits(Inst->getValueSizeInBits(0), 0); + UsedBits.setAllBits(); + UsedBits = UsedBits.zext(BitWidth); + UsedBits <<= Shift; + return UsedBits; + } + + /// \brief Get the size of the slice to be loaded in bytes. + unsigned getLoadedSize() const { + unsigned SliceSize = getUsedBits().countPopulation(); + assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); + return SliceSize / 8; + } + + /// \brief Get the type that will be loaded for this slice. + /// Note: This may not be the final type for the slice. + EVT getLoadedType() const { + assert(DAG && "Missing context"); + LLVMContext &Ctxt = *DAG->getContext(); + return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); + } + + /// \brief Get the alignment of the load used for this slice. + unsigned getAlignment() const { + unsigned Alignment = Origin->getAlignment(); + unsigned Offset = getOffsetFromBase(); + if (Offset != 0) + Alignment = MinAlign(Alignment, Alignment + Offset); + return Alignment; + } + + /// \brief Check if this slice can be rewritten with legal operations. + bool isLegal() const { + // An invalid slice is not legal. + if (!Origin || !Inst || !DAG) + return false; + + // Offsets are for indexed load only, we do not handle that. + if (Origin->getOffset().getOpcode() != ISD::UNDEF) + return false; + + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + + // Check that the type is legal. + EVT SliceType = getLoadedType(); + if (!TLI.isTypeLegal(SliceType)) + return false; + + // Check that the load is legal for this type. + if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) + return false; + + // Check that the offset can be computed. + // 1. Check its type. + EVT PtrType = Origin->getBasePtr().getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + return false; + + // 2. Check that it fits in the immediate. + if (!TLI.isLegalAddImmediate(getOffsetFromBase())) + return false; + + // 3. Check that the computation is legal. + if (!TLI.isOperationLegal(ISD::ADD, PtrType)) + return false; + + // Check that the zext is legal if it needs one. + EVT TruncateType = Inst->getValueType(0); + if (TruncateType != SliceType && + !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) + return false; + + return true; + } + + /// \brief Get the offset in bytes of this slice in the original chunk of + /// bits. + /// \pre DAG != NULL. + uint64_t getOffsetFromBase() const { + assert(DAG && "Missing context."); + bool IsBigEndian = + DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); + assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); + uint64_t Offset = Shift / 8; + unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; + assert(!(Origin->getValueSizeInBits(0) & 0x7) && + "The size of the original loaded type is not a multiple of a" + " byte."); + // If Offset is bigger than TySizeInBytes, it means we are loading all + // zeros. This should have been optimized before in the process. + assert(TySizeInBytes > Offset && + "Invalid shift amount for given loaded size"); + if (IsBigEndian) + Offset = TySizeInBytes - Offset - getLoadedSize(); + return Offset; + } + + /// \brief Generate the sequence of instructions to load the slice + /// represented by this object and redirect the uses of this slice to + /// this new sequence of instructions. + /// \pre this->Inst && this->Origin are valid Instructions and this + /// object passed the legal check: LoadedSlice::isLegal returned true. + /// \return The last instruction of the sequence used to load the slice. + SDValue loadSlice() const { + assert(Inst && Origin && "Unable to replace a non-existing slice."); + const SDValue &OldBaseAddr = Origin->getBasePtr(); + SDValue BaseAddr = OldBaseAddr; + // Get the offset in that chunk of bytes w.r.t. the endianess. + int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); + assert(Offset >= 0 && "Offset too big to fit in int64_t!"); + if (Offset) { + // BaseAddr = BaseAddr + Offset. + EVT ArithType = BaseAddr.getValueType(); + BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, + DAG->getConstant(Offset, ArithType)); + } + + // Create the type of the loaded slice according to its size. + EVT SliceType = getLoadedType(); + + // Create the load for the slice. + SDValue LastInst = DAG->getLoad( + SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, + Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), + Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); + // If the final type is not the same as the loaded type, this means that + // we have to pad with zero. Create a zero extend for that. + EVT FinalType = Inst->getValueType(0); + if (SliceType != FinalType) + LastInst = + DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); + return LastInst; + } + + /// \brief Check if this slice can be merged with an expensive cross register + /// bank copy. E.g., + /// i = load i32 + /// f = bitcast i32 i to float + bool canMergeExpensiveCrossRegisterBankCopy() const { + if (!Inst || !Inst->hasOneUse()) + return false; + SDNode *Use = *Inst->use_begin(); + if (Use->getOpcode() != ISD::BITCAST) + return false; + assert(DAG && "Missing context"); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + EVT ResVT = Use->getValueType(0); + const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); + const TargetRegisterClass *ArgRC = + TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); + if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // At this point, we know that we perform a cross-register-bank copy. + // Check if it is expensive. + const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo(); + // Assume bitcasts are cheap, unless both register classes do not + // explicitly share a common sub class. + if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) + return false; + + // Check if it will be merged with the load. + // 1. Check the alignment constraint. + unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( + ResVT.getTypeForEVT(*DAG->getContext())); + + if (RequiredAlignment > getAlignment()) + return false; + + // 2. Check that the load is a legal operation for that type. + if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // 3. Check that we do not have a zext in the way. + if (Inst->getValueType(0) != getLoadedType()) + return false; + + return true; + } +}; +} + +/// \brief Sorts LoadedSlice according to their offset. +struct LoadedSliceSorter { + bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) { + assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); + return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); + } +}; + +/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., +/// \p UsedBits looks like 0..0 1..1 0..0. +static bool areUsedBitsDense(const APInt &UsedBits) { + // If all the bits are one, this is dense! + if (UsedBits.isAllOnesValue()) + return true; + + // Get rid of the unused bits on the right. + APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); + // Get rid of the unused bits on the left. + if (NarrowedUsedBits.countLeadingZeros()) + NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); + // Check that the chunk of bits is completely used. + return NarrowedUsedBits.isAllOnesValue(); +} + +/// \brief Check whether or not \p First and \p Second are next to each other +/// in memory. This means that there is no hole between the bits loaded +/// by \p First and the bits loaded by \p Second. +static bool areSlicesNextToEachOther(const LoadedSlice &First, + const LoadedSlice &Second) { + assert(First.Origin == Second.Origin && First.Origin && + "Unable to match different memory origins."); + APInt UsedBits = First.getUsedBits(); + assert((UsedBits & Second.getUsedBits()) == 0 && + "Slices are not supposed to overlap."); + UsedBits |= Second.getUsedBits(); + return areUsedBitsDense(UsedBits); +} + +/// \brief Adjust the \p GlobalLSCost according to the target +/// paring capabilities and the layout of the slices. +/// \pre \p GlobalLSCost should account for at least as many loads as +/// there is in the slices in \p LoadedSlices. +static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, + LoadedSlice::Cost &GlobalLSCost) { + unsigned NumberOfSlices = LoadedSlices.size(); + // If there is less than 2 elements, no pairing is possible. + if (NumberOfSlices < 2) + return; + + // Sort the slices so that elements that are likely to be next to each + // other in memory are next to each other in the list. + std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter()); + const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); + // First (resp. Second) is the first (resp. Second) potentially candidate + // to be placed in a paired load. + const LoadedSlice *First = NULL; + const LoadedSlice *Second = NULL; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, + // Set the beginning of the pair. + First = Second) { + + Second = &LoadedSlices[CurrSlice]; + + // If First is NULL, it means we start a new pair. + // Get to the next slice. + if (!First) + continue; + + EVT LoadedType = First->getLoadedType(); + + // If the types of the slices are different, we cannot pair them. + if (LoadedType != Second->getLoadedType()) + continue; + + // Check if the target supplies paired loads for this type. + unsigned RequiredAlignment = 0; + if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { + // move to the next pair, this type is hopeless. + Second = NULL; + continue; + } + // Check if we meet the alignment requirement. + if (RequiredAlignment > First->getAlignment()) + continue; + + // Check that both loads are next to each other in memory. + if (!areSlicesNextToEachOther(*First, *Second)) + continue; + + assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); + --GlobalLSCost.Loads; + // Move to the next pair. + Second = NULL; + } +} + +/// \brief Check the profitability of all involved LoadedSlice. +/// Currently, it is considered profitable if there is exactly two +/// involved slices (1) which are (2) next to each other in memory, and +/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). +/// +/// Note: The order of the elements in \p LoadedSlices may be modified, but not +/// the elements themselves. +/// +/// FIXME: When the cost model will be mature enough, we can relax +/// constraints (1) and (2). +static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, + const APInt &UsedBits, bool ForCodeSize) { + unsigned NumberOfSlices = LoadedSlices.size(); + if (StressLoadSlicing) + return NumberOfSlices > 1; + + // Check (1). + if (NumberOfSlices != 2) + return false; + + // Check (2). + if (!areUsedBitsDense(UsedBits)) + return false; + + // Check (3). + LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); + // The original code has one big load. + OrigCost.Loads = 1; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { + const LoadedSlice &LS = LoadedSlices[CurrSlice]; + // Accumulate the cost of all the slices. + LoadedSlice::Cost SliceCost(LS, ForCodeSize); + GlobalSlicingCost += SliceCost; + + // Account as cost in the original configuration the gain obtained + // with the current slices. + OrigCost.addSliceGain(LS); + } + + // If the target supports paired load, adjust the cost accordingly. + adjustCostForPairing(LoadedSlices, GlobalSlicingCost); + return OrigCost > GlobalSlicingCost; +} + +/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) +/// operations, split it in the various pieces being extracted. +/// +/// This sort of thing is introduced by SROA. +/// This slicing takes care not to insert overlapping loads. +/// \pre LI is a simple load (i.e., not an atomic or volatile load). +bool DAGCombiner::SliceUpLoad(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + LoadSDNode *LD = cast<LoadSDNode>(N); + if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + !LD->getValueType(0).isInteger()) + return false; + + // Keep track of already used bits to detect overlapping values. + // In that case, we will just abort the transformation. + APInt UsedBits(LD->getValueSizeInBits(0), 0); + + SmallVector<LoadedSlice, 4> LoadedSlices; + + // Check if this load is used as several smaller chunks of bits. + // Basically, look for uses in trunc or trunc(lshr) and record a new chain + // of computation for each trunc. + for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); + UI != UIEnd; ++UI) { + // Skip the uses of the chain. + if (UI.getUse().getResNo() != 0) + continue; + + SDNode *User = *UI; + unsigned Shift = 0; + + // Check if this is a trunc(lshr). + if (User->getOpcode() == ISD::SRL && User->hasOneUse() && + isa<ConstantSDNode>(User->getOperand(1))) { + Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); + User = *User->use_begin(); + } + + // At this point, User is a Truncate, iff we encountered, trunc or + // trunc(lshr). + if (User->getOpcode() != ISD::TRUNCATE) + return false; + + // The width of the type must be a power of 2 and greater than 8-bits. + // Otherwise the load cannot be represented in LLVM IR. + // Moreover, if we shifted with a non 8-bits multiple, the slice + // will be accross several bytes. We do not support that. + unsigned Width = User->getValueSizeInBits(0); + if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) + return 0; + + // Build the slice for this chain of computations. + LoadedSlice LS(User, LD, Shift, &DAG); + APInt CurrentUsedBits = LS.getUsedBits(); + + // Check if this slice overlaps with another. + if ((CurrentUsedBits & UsedBits) != 0) + return false; + // Update the bits used globally. + UsedBits |= CurrentUsedBits; + + // Check if the new slice would be legal. + if (!LS.isLegal()) + return false; + + // Record the slice. + LoadedSlices.push_back(LS); + } + + // Abort slicing if it does not seem to be profitable. + if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) + return false; + + ++SlicedLoads; + + // Rewrite each chain to use an independent load. + // By construction, each chain can be represented by a unique load. + + // Prepare the argument for the new token factor for all the slices. + SmallVector<SDValue, 8> ArgChains; + for (SmallVectorImpl<LoadedSlice>::const_iterator + LSIt = LoadedSlices.begin(), + LSItEnd = LoadedSlices.end(); + LSIt != LSItEnd; ++LSIt) { + SDValue SliceInst = LSIt->loadSlice(); + CombineTo(LSIt->Inst, SliceInst, true); + if (SliceInst.getNode()->getOpcode() != ISD::LOAD) + SliceInst = SliceInst.getOperand(0); + assert(SliceInst->getOpcode() == ISD::LOAD && + "It takes more than a zext to get to the loaded slice!!"); + ArgChains.push_back(SliceInst.getValue(1)); + } + + SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, + &ArgChains[0], ArgChains.size()); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + return true; +} + /// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the /// load is having specific bytes cleared out. If so, return the byte size /// being masked out and the shift amount. @@ -7500,9 +8172,9 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { // 0 and the bits being kept are 1. Use getSExtValue so that leading bits // follow the sign bit for uniformity. uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); - unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); + unsigned NotMaskLZ = countLeadingZeros(NotMask); if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. - unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); + unsigned NotMaskTZ = countTrailingZeros(NotMask); if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. if (NotMaskLZ == 64) return Result; // All zero mask. @@ -7559,7 +8231,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. if (ByteShift) - IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, + IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, DAG.getConstant(ByteShift*8, DC->getShiftAmountTy(IVal.getValueType()))); @@ -7574,16 +8246,16 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue Ptr = St->getBasePtr(); if (StOffset) { - Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), + Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); NewAlign = MinAlign(NewAlign, StOffset); } // Truncate down to the new size. - IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); + IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); ++OpsNarrowed; - return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, + return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, St->getPointerInfo().getWithOffset(StOffset), false, false, NewAlign).getNode(); } @@ -7684,17 +8356,18 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) return SDValue(); - SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), Ptr.getValueType(), Ptr, DAG.getConstant(PtrOff, Ptr.getValueType())); - SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), + SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), NewAlign); - SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, + LD->isInvariant(), NewAlign, + LD->getTBAAInfo()); + SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); - SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), + SDValue NewST = DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, ST->getPointerInfo().getWithOffset(PtrOff), false, false, NewAlign); @@ -7747,12 +8420,12 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); - SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), + SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), false, false, false, LDAlign); - SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), + SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), ST->getPointerInfo(), false, false, STAlign); @@ -7802,17 +8475,28 @@ struct BaseIndexOffset { static BaseIndexOffset match(SDValue Ptr) { bool IsIndexSignExt = false; - // Just Base or possibly anything else. + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD + // instruction, then it could be just the BASE or everything else we don't + // know how to handle. Just use Ptr as BASE and give up. if (Ptr->getOpcode() != ISD::ADD) return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); - // Base + offset. + // We know that we have at least an ADD instruction. Try to pattern match + // the simple case of BASE + OFFSET. if (isa<ConstantSDNode>(Ptr->getOperand(1))) { int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, IsIndexSignExt); } + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Ptr is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + // Look at Base + Index + Offset cases. SDValue Base = Ptr->getOperand(0); SDValue IndexOffset = Ptr->getOperand(1); @@ -7963,6 +8647,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { Index = STn; break; } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { + if (Ldn->isVolatile()) { + Index = NULL; + break; + } + // Save the load node for later. Continue the scan. AliasLoadNodes.push_back(Ldn); NextInChain = Ldn->getChain().getNode(); @@ -8080,7 +8769,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // The earliest Node in the DAG. LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; - DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc(); + SDLoc DL(StoreNodes[0].MemNode); SDValue StoredVal; if (UseVector) { @@ -8276,8 +8965,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); } - DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc(); - DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc(); + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, @@ -8338,9 +9027,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) - return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), OrigAlign); + ST->isNonTemporal(), OrigAlign, + ST->getTBAAInfo()); } // Turn 'store undef, Ptr' -> nothing. @@ -8355,7 +9045,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // transform should not be done in this case. if (Value.getOpcode() != ISD::TargetConstantFP) { SDValue Tmp; - switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { + switch (CFP->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unknown FP type"); case MVT::f16: // We don't do this for these yet. case MVT::f80: @@ -8367,9 +9057,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), MVT::i32); - return DAG.getStore(Chain, N->getDebugLoc(), Tmp, - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + return DAG.getStore(Chain, SDLoc(N), Tmp, + Ptr, ST->getMemOperand()); } break; case MVT::f64: @@ -8378,9 +9067,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). getZExtValue(), MVT::i64); - return DAG.getStore(Chain, N->getDebugLoc(), Tmp, - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + return DAG.getStore(Chain, SDLoc(N), Tmp, + Ptr, ST->getMemOperand()); } if (!ST->isVolatile() && @@ -8396,19 +9084,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); - SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, + SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, - ST->getAlignment()); - Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, + ST->getAlignment(), TBAAInfo); + Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); - SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, + SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, - Alignment); - return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + Alignment, TBAAInfo); + return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, St0, St1); } @@ -8421,9 +9110,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) - return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, + return DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), Align); + ST->isVolatile(), ST->isNonTemporal(), Align, + ST->getTBAAInfo()); } } @@ -8433,7 +9123,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (NewST.getNode()) return NewST; - if (CombinerAA) { + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -8443,19 +9135,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { - ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, - ST->getPointerInfo(), - ST->getMemoryVT(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, + ST->getMemoryVT(), ST->getMemOperand()); } else { - ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, - ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, + ST->getMemOperand()); } // Create token to keep both nodes around. - SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplStore); // Make sure the new and old chains are cleaned up. @@ -8483,10 +9171,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->getMemoryVT().getScalarType().getSizeInBits())); AddToWorkList(Value.getNode()); if (Shorter.getNode()) - return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + return DAG.getTruncStore(Chain, SDLoc(N), Shorter, + Ptr, ST->getMemoryVT(), ST->getMemOperand()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. @@ -8516,10 +9202,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { && Value.getNode()->hasOneUse() && ST->isUnindexed() && TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), ST->getMemoryVT())) { - return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), + Ptr, ST->getMemoryVT(), ST->getMemOperand()); } // Only perform this optimization before the types are legal, because we @@ -8547,7 +9231,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); SDValue EltNo = N->getOperand(2); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If the inserted element is an UNDEF, just use the input vector. if (InVal.getOpcode() == ISD::UNDEF) @@ -8568,7 +9252,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // be converted to a BUILD_VECTOR). Fill in the Ops vector with the // vector elements. SmallVector<SDValue, 8> Ops; - if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + // Do not combine these two vectors if the output vector will not replace + // the input vector. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { Ops.append(InVec.getNode()->op_begin(), InVec.getNode()->op_end()); } else if (InVec.getOpcode() == ISD::UNDEF) { @@ -8608,7 +9294,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue InOp = InVec.getOperand(0); if (InOp.getValueType() != NVT) { assert(InOp.getValueType().isInteger() && NVT.isInteger()); - return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); + return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); } return InOp; } @@ -8641,8 +9327,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { OrigElt -= NumElem; } - EVT IndexTy = N->getOperand(1).getValueType(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, + EVT IndexTy = TLI.getVectorIdxTy(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, InVec, DAG.getConstant(OrigElt, IndexTy)); } @@ -8756,7 +9442,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { EVT PtrType = NewPtr.getValueType(); if (TLI.isBigEndian()) PtrOff = VT.getSizeInBits() / 8 - PtrOff; - NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, + NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr, DAG.getConstant(PtrOff, PtrType)); } @@ -8773,20 +9459,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // extending load instead. ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), + Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + LVT, LN0->isVolatile(), LN0->isNonTemporal(), + Align, LN0->getTBAAInfo()); Chain = Load.getValue(1); } else { - Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align, LN0->getTBAAInfo()); Chain = Load.getValue(1); if (NVT.bitsLT(LVT)) - Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load); else - Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); + Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load); } WorkListRemover DeadNodes(*this); SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; @@ -8816,7 +9503,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { return SDValue(); unsigned NumInScalars = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); // Check to see if this is a BUILD_VECTOR of a bunch of values @@ -8918,7 +9605,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumInScalars = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT SrcVT = MVT::Other; unsigned Opcode = ISD::DELETED_NODE; @@ -8983,7 +9670,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); // A vector built entirely of undefs is undef. @@ -9119,8 +9806,35 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return N->getOperand(0); // Check if all of the operands are undefs. + EVT VT = N->getValueType(0); if (ISD::allOperandsUndef(N)) - return DAG.getUNDEF(N->getValueType(0)); + return DAG.getUNDEF(VT); + + // Optimize concat_vectors where one of the vectors is undef. + if (N->getNumOperands() == 2 && + N->getOperand(1)->getOpcode() == ISD::UNDEF) { + SDValue In = N->getOperand(0); + assert(In.getValueType().isVector() && "Must concat vectors"); + + // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). + if (In->getOpcode() == ISD::BITCAST && + !In->getOperand(0)->getValueType(0).isVector()) { + SDValue Scalar = In->getOperand(0); + EVT SclTy = Scalar->getValueType(0); + + if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) + return SDValue(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, + VT.getSizeInBits() / SclTy.getSizeInBits()); + if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) + return SDValue(); + + SDLoc dl = SDLoc(N); + SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); + return DAG.getNode(ISD::BITCAST, dl, VT, Res); + } + } // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. @@ -9158,7 +9872,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // The extract index must be constant. if (!CS) return SDValue(); - + // Check that we are reading from the identity index. if (CS->getZExtValue() != IdentityIndex) return SDValue(); @@ -9166,7 +9880,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SingleSource.getNode()) return SingleSource; - + return SDValue(); } @@ -9179,7 +9893,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // (extract_subvec (concat V1, V2, ...), i) // Into: // Vi if possible - // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + // Only operand 0 is checked as 'concat' assumes all inputs of the same + // type. if (V->getOperand(0).getValueType() != NVT) return SDValue(); unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); @@ -9194,7 +9909,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { V = V.getOperand(0); if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Handle only simple case where vector being inserted and vector // being extracted are of same type, and are half size of larger vectors. EVT BigVT = V->getOperand(0).getValueType(); @@ -9246,22 +9961,36 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { for (unsigned I = 0; I != NumConcats; ++I) { // Make sure we're dealing with a copy. unsigned Begin = I * NumElemsPerConcat; - if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) - return SDValue(); + bool AllUndef = true, NoUndef = true; + for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { + if (SVN->getMaskElt(J) >= 0) + AllUndef = false; + else + NoUndef = false; + } - for (unsigned J = 1; J != NumElemsPerConcat; ++J) { - if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + if (NoUndef) { + if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) return SDValue(); - } - unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; - if (FirstElt < N0.getNumOperands()) - Ops.push_back(N0.getOperand(FirstElt)); - else - Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + for (unsigned J = 1; J != NumElemsPerConcat; ++J) + if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + return SDValue(); + + unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; + if (FirstElt < N0.getNumOperands()) + Ops.push_back(N0.getOperand(FirstElt)); + else + Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + + } else if (AllUndef) { + Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); + } else { // Mixed with general masks and undefs, can't do optimization. + return SDValue(); + } } - return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(), + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(), Ops.size()); } @@ -9288,7 +10017,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (Idx >= (int)NumElts) Idx -= NumElts; NewMask.push_back(Idx); } - return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT), + return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), &NewMask[0]); } @@ -9298,14 +10027,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx >= 0) { - if (Idx < (int)NumElts) - Idx += NumElts; - else + if (Idx >= (int)NumElts) Idx -= NumElts; + else + Idx = -1; // remove reference to lhs } NewMask.push_back(Idx); } - return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), + return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), &NewMask[0]); } @@ -9322,7 +10051,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { NewMask.push_back(Idx); } if (Changed) - return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); } // If it is a splat, check if the argument vector is another splat or a @@ -9419,7 +10148,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (N->getOpcode() == ISD::AND) { @@ -9450,7 +10179,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT EltVT = RVT.getVectorElementType(); SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, &ZeroOps[0], ZeroOps.size()); LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); @@ -9506,13 +10235,13 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // legalization, the types may not match between the two BUILD_VECTORS. // Truncate one of the operands to make them match. if (RVT.getSizeInBits() > VT.getSizeInBits()) { - RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp); + RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp); } else { - LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp); + LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp); VT = RVT; } } - SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, + SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, LHSOp, RHSOp); if (FoldOp.getOpcode() != ISD::UNDEF && FoldOp.getOpcode() != ISD::Constant && @@ -9523,7 +10252,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } if (Ops.size() == LHS.getNumOperands()) - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), &Ops[0], Ops.size()); } @@ -9548,7 +10277,7 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { Op.getOpcode() != ISD::ConstantFP) break; EVT EltVT = Op.getValueType(); - SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op); + SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op); if (FoldOp.getOpcode() != ISD::UNDEF && FoldOp.getOpcode() != ISD::ConstantFP) break; @@ -9559,11 +10288,11 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { if (Ops.size() != N0.getNumOperands()) return SDValue(); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), &Ops[0], Ops.size()); } -SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, +SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2){ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); @@ -9577,13 +10306,13 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, // Check to see if we got a select_cc back (to turn into setcc/select). // Otherwise, just return whatever node we got back, like fabs. if (SCC.getOpcode() == ISD::SELECT_CC) { - SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), + SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), N0.getValueType(), SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4)); AddToWorkList(SETCC.getNode()); - return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), - SCC.getOperand(2), SCC.getOperand(3), SETCC); + return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), + SCC.getOperand(2), SCC.getOperand(3), SETCC); } return SCC; @@ -9652,10 +10381,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD)) return false; - Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), LLD->getBasePtr(), - RLD->getBasePtr()); + Addr = DAG.getSelect(SDLoc(TheSelect), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); } else { // Otherwise SELECT_CC SDNode *CondLHS = TheSelect->getOperand(0).getNode(); SDNode *CondRHS = TheSelect->getOperand(1).getNode(); @@ -9666,7 +10395,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) return false; - Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), TheSelect->getOperand(1), @@ -9677,17 +10406,17 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue Load; if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { Load = DAG.getLoad(TheSelect->getValueType(0), - TheSelect->getDebugLoc(), - // FIXME: Discards pointer info. + SDLoc(TheSelect), + // FIXME: Discards pointer and TBAA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), LLD->isInvariant(), LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), - TheSelect->getDebugLoc(), + SDLoc(TheSelect), TheSelect->getValueType(0), - // FIXME: Discards pointer info. + // FIXME: Discards pointer and TBAA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->getMemoryVT(), LLD->isVolatile(), LLD->isNonTemporal(), LLD->getAlignment()); @@ -9708,7 +10437,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, /// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. -SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, +SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare) { // (x ? y : y) -> y. @@ -9720,7 +10449,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); // Determine if the condition we're dealing with is constant - SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, DL, false); if (SCC.getNode()) AddToWorkList(SCC.getNode()); ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); @@ -9786,13 +10515,13 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue One = DAG.getIntPtrConstant(EltSize); SDValue Cond = DAG.getSetCC(DL, - TLI.getSetCCResultType(N0.getValueType()), + getSetCCResultType(N0.getValueType()), N0, N1, CC); AddToWorkList(Cond.getNode()); - SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), - Cond, One, Zero); + SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), + Cond, One, Zero); AddToWorkList(CstOffset.getNode()); - CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, + CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); AddToWorkList(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, @@ -9817,7 +10546,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, ShCtV = XType.getSizeInBits()-ShCtV-1; SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy(N0.getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), + SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, ShCt); AddToWorkList(Shift.getNode()); @@ -9829,7 +10558,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return DAG.getNode(ISD::AND, DL, AType, Shift, N2); } - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), + SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); @@ -9862,14 +10591,14 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue ShlAmt = DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy(AndLHS.getValueType())); - SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is either // all-ones, or zero. SDValue ShrAmt = DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy(Shl.getValueType())); - SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); + SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); return DAG.getNode(ISD::AND, DL, VT, Shr, N3); } @@ -9889,21 +10618,21 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // NOTE: Don't create a SETCC if it's not legal on this target. if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, - LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) { + LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) { SDValue Temp, SCC; // cast from setcc result type to select result type if (LegalTypes) { - SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); if (N2.getValueType().bitsLT(SCC.getValueType())) - Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), + Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), N2.getValueType()); else - Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC); } else { - SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); - Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC); } @@ -9914,9 +10643,10 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return Temp; // shl setcc result by log2 n2c - return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), - getShiftAmountTy(Temp.getValueType()))); + return DAG.getNode( + ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy(Temp.getValueType()))); } } @@ -9926,8 +10656,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { EVT XType = N0.getValueType(); if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { - SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); + TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { + SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC); if (Res.getValueType() != VT) Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); return Res; @@ -9937,16 +10667,16 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, XType))) { - SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); + SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); return DAG.getNode(ISD::SRL, DL, XType, Ctlz, DAG.getConstant(Log2_32(XType.getSizeInBits()), getShiftAmountTy(Ctlz.getValueType()))); } // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { - SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), + SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0), XType, DAG.getConstant(0, XType), N0); - SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); + SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType); return DAG.getNode(ISD::SRL, DL, XType, DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), DAG.getConstant(XType.getSizeInBits()-1, @@ -9954,7 +10684,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { - SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, + SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); @@ -9980,11 +10710,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, EVT XType = N0.getValueType(); if (SubC && SubC->isNullValue() && XType.isInteger()) { - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); - SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), XType, N0, Shift); AddToWorkList(Shift.getNode()); AddToWorkList(Add.getNode()); @@ -9998,7 +10728,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - DebugLoc DL, bool foldBooleans) { + SDLoc DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); @@ -10072,17 +10802,20 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, /// isAlias - Return true if there is any possibility that the two addresses /// overlap. -bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, +bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, + SDValue Ptr2, int64_t Size2, bool IsVolatile2, const Value *SrcValue2, int SrcValueOffset2, unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const { // If they are the same then they must be aliases. if (Ptr1 == Ptr2) return true; + // If they are both volatile then they cannot be reordered. + if (IsVolatile1 && IsVolatile2) return true; + // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; @@ -10127,7 +10860,9 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, return false; } - if (CombinerGlobalAA) { + bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA && SrcValue1 && SrcValue2) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; @@ -10146,24 +10881,25 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) { SDValue Ptr0, Ptr1; int64_t Size0, Size1; + bool IsVolatile0, IsVolatile1; const Value *SrcValue0, *SrcValue1; int SrcValueOffset0, SrcValueOffset1; unsigned SrcValueAlign0, SrcValueAlign1; const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1; - FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0, + FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, SrcValueAlign0, SrcTBAAInfo0); - FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1, + FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, SrcValueAlign1, SrcTBAAInfo1); - return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0, + return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, SrcValueAlign0, SrcTBAAInfo0, - Ptr1, Size1, SrcValue1, SrcValueOffset1, + Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, SrcValueAlign1, SrcTBAAInfo1); } /// FindAliasInfo - Extracts the relevant alias information from the memory -/// node. Returns true if the operand was a load. +/// node. Returns true if the operand was a nonvolatile load. bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, + SDValue &Ptr, int64_t &Size, bool &IsVolatile, const Value *&SrcValue, int &SrcValueOffset, unsigned &SrcValueAlign, @@ -10172,29 +10908,31 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, Ptr = LS->getBasePtr(); Size = LS->getMemoryVT().getSizeInBits() >> 3; + IsVolatile = LS->isVolatile(); SrcValue = LS->getSrcValue(); SrcValueOffset = LS->getSrcValueOffset(); SrcValueAlign = LS->getOriginalAlignment(); TBAAInfo = LS->getTBAAInfo(); - return isa<LoadSDNode>(LS); + return isa<LoadSDNode>(LS) && !IsVolatile; } /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, - SmallVector<SDValue, 8> &Aliases) { + SmallVectorImpl<SDValue> &Aliases) { SmallVector<SDValue, 8> Chains; // List of chains to visit. SmallPtrSet<SDNode *, 16> Visited; // Visited node set. // Get alias information for node. SDValue Ptr; int64_t Size; + bool IsVolatile; const Value *SrcValue; int SrcValueOffset; unsigned SrcValueAlign; const MDNode *SrcTBAAInfo; - bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, - SrcValueAlign, SrcTBAAInfo); + bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue, + SrcValueOffset, SrcValueAlign, SrcTBAAInfo); // Starting off. Chains.push_back(OriginalChain); @@ -10235,20 +10973,21 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // Get alias information for Chain. SDValue OpPtr; int64_t OpSize; + bool OpIsVolatile; const Value *OpSrcValue; int OpSrcValueOffset; unsigned OpSrcValueAlign; const MDNode *OpSrcTBAAInfo; bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpSrcValue, OpSrcValueOffset, + OpIsVolatile, OpSrcValue, OpSrcValueOffset, OpSrcValueAlign, OpSrcTBAAInfo); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, - SrcTBAAInfo, - OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, + isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset, + SrcValueAlign, SrcTBAAInfo, + OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset, OpSrcValueAlign, OpSrcTBAAInfo)) { Aliases.push_back(Chain); } else { @@ -10298,7 +11037,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return Aliases[0]; // Construct a custom tailored token factor. - return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, &Aliases[0], Aliases.size()); } |