diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r-- | contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2333 |
1 files changed, 1441 insertions, 892 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5ecc6da..2c7bffe 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16,14 +16,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -181,7 +182,7 @@ namespace { /// if things it uses can be simplified by bit propagation. /// If so, return true. bool SimplifyDemandedBits(SDValue Op) { - unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + unsigned BitWidth = Op.getScalarValueSizeInBits(); APInt Demanded = APInt::getAllOnesValue(BitWidth); return SimplifyDemandedBits(Op, Demanded); } @@ -326,7 +327,7 @@ namespace { SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); - SDValue visitFMULForFMACombine(SDNode *N); + SDValue visitFMULForFMADistributiveCombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS, @@ -334,12 +335,15 @@ namespace { SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); + SDValue foldSelectOfConstants(SDNode *N); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); + SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, ISD::CondCode CC); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, bool foldBooleans = true); @@ -356,6 +360,7 @@ namespace { SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); + SDValue BuildLogBase2(SDValue Op, const SDLoc &DL); SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags); @@ -374,9 +379,14 @@ namespace { SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); + SDValue splitMergedValStore(StoreSDNode *ST); SDValue TransformFPLoadStorePair(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); + SDValue reduceBuildVecToShuffle(SDNode *N); + SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask, + SDValue VecIn1, SDValue VecIn2, + unsigned LeftIdx); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -444,10 +454,11 @@ namespace { /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. - /// \return True if a merged store was created. - bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, - EVT MemVT, unsigned NumStores, - bool IsConstantSrc, bool UseVector); + /// \return number of stores that were merged into a merged store (always + /// a prefix of \p StoreNode). + bool MergeStoresOfConstantsOrVecElts( + SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, + bool IsConstantSrc, bool UseVector); /// This is a helper function for MergeConsecutiveStores. /// Stores that may be merged are placed in StoreNodes. @@ -464,8 +475,10 @@ namespace { /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. - /// \return True if some memory operations were changed. - bool MergeConsecutiveStores(StoreSDNode *N); + /// \return number of stores that were merged into a merged store (the + /// affected nodes are stored as a prefix in \p StoreNodes). + bool MergeConsecutiveStores(StoreSDNode *N, + SmallVectorImpl<MemOpLink> &StoreNodes); /// \brief Try to transform a truncation where C is a constant: /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) @@ -536,10 +549,6 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { ((DAGCombiner*)DC)->AddToWorklist(N); } -void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { - ((DAGCombiner*)DC)->removeFromWorklist(N); -} - SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); @@ -620,7 +629,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options->UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros()) + return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) return 1; @@ -683,9 +693,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, LegalOperations, Depth+1), Op.getOperand(0), Flags); case ISD::FSUB: - // We can't turn -(A-B) into B-A when we honor signed zeros. - assert(Options.UnsafeFPMath); - // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) if (N0CFP->isZero()) @@ -726,6 +733,15 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } } +// APInts must be the same size for most operations, this helper +// function zero extends the shorter of the pair so that they match. +// We provide an Offset so that we can create bitwidths that won't overflow. +static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { + unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth()); + LHS = LHS.zextOrSelf(Bits); + RHS = RHS.zextOrSelf(Bits); +} + // Return true if this node is a setcc, or is a select_cc // that selects between the target values used for true and false, making it // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to @@ -775,42 +791,61 @@ static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { return nullptr; } -// \brief Returns the SDNode if it is a constant splat BuildVector or constant -// int. -static ConstantSDNode *isConstOrConstSplat(SDValue N) { - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) - return CN; - - if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { - BitVector UndefElements; - ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); - - // BuildVectors can truncate their operands. Ignore that case here. - // FIXME: We blindly ignore splats which include undef which is overly - // pessimistic. - if (CN && UndefElements.none() && - CN->getValueType(0) == N.getValueType().getScalarType()) - return CN; +// Determines if it is a constant integer or a build vector of constant +// integers (and undefs). +// Do not permit build vector implicit truncation. +static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N)) + return !(Const->isOpaque() && NoOpaques); + if (N.getOpcode() != ISD::BUILD_VECTOR) + return false; + unsigned BitWidth = N.getScalarValueSizeInBits(); + for (const SDValue &Op : N->op_values()) { + if (Op.isUndef()) + continue; + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op); + if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth || + (Const->isOpaque() && NoOpaques)) + return false; } - - return nullptr; + return true; } -// \brief Returns the SDNode if it is a constant splat BuildVector or constant -// float. -static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { - if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) - return CN; +// Determines if it is a constant null integer or a splatted vector of a +// constant null integer (with no undefs). +// Build vector implicit truncation is not an issue for null values. +static bool isNullConstantOrNullSplatConstant(SDValue N) { + if (ConstantSDNode *Splat = isConstOrConstSplat(N)) + return Splat->isNullValue(); + return false; +} - if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { - BitVector UndefElements; - ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); +// Determines if it is a constant integer of one or a splatted vector of a +// constant integer of one (with no undefs). +// Do not permit build vector implicit truncation. +static bool isOneConstantOrOneSplatConstant(SDValue N) { + unsigned BitWidth = N.getScalarValueSizeInBits(); + if (ConstantSDNode *Splat = isConstOrConstSplat(N)) + return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth; + return false; +} - if (CN && UndefElements.none()) - return CN; - } +// Determines if it is a constant integer of all ones or a splatted vector of a +// constant integer of all ones (with no undefs). +// Do not permit build vector implicit truncation. +static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) { + unsigned BitWidth = N.getScalarValueSizeInBits(); + if (ConstantSDNode *Splat = isConstOrConstSplat(N)) + return Splat->isAllOnesValue() && + Splat->getAPIntValue().getBitWidth() == BitWidth; + return false; +} - return nullptr; +// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with +// undef's. +static bool isAnyConstantBuildVector(const SDNode *N) { + return ISD::isBuildVectorOfConstantSDNodes(N) || + ISD::isBuildVectorOfConstantFPSDNodes(N); } SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, @@ -935,9 +970,9 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { } void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { - SDLoc dl(Load); + SDLoc DL(Load); EVT VT = Load->getValueType(0); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); @@ -953,7 +988,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = false; - SDLoc dl(Op); + SDLoc DL(Op); if (ISD::isUNINDEXEDLoad(Op.getNode())) { LoadSDNode *LD = cast<LoadSDNode>(Op); EVT MemVT = LD->getMemoryVT(); @@ -962,7 +997,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; - return DAG.getExtLoad(ExtType, dl, PVT, + return DAG.getExtLoad(ExtType, DL, PVT, LD->getChain(), LD->getBasePtr(), MemVT, LD->getMemOperand()); } @@ -971,30 +1006,30 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { switch (Opc) { default: break; case ISD::AssertSext: - return DAG.getNode(ISD::AssertSext, dl, PVT, + return DAG.getNode(ISD::AssertSext, DL, PVT, SExtPromoteOperand(Op.getOperand(0), PVT), Op.getOperand(1)); case ISD::AssertZext: - return DAG.getNode(ISD::AssertZext, dl, PVT, + return DAG.getNode(ISD::AssertZext, DL, PVT, ZExtPromoteOperand(Op.getOperand(0), PVT), Op.getOperand(1)); case ISD::Constant: { unsigned ExtOpc = Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - return DAG.getNode(ExtOpc, dl, PVT, Op); + return DAG.getNode(ExtOpc, DL, PVT, Op); } } if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) return SDValue(); - return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); + return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op); } SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) return SDValue(); EVT OldVT = Op.getValueType(); - SDLoc dl(Op); + SDLoc DL(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (!NewOp.getNode()) @@ -1003,13 +1038,13 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp, DAG.getValueType(OldVT)); } SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { EVT OldVT = Op.getValueType(); - SDLoc dl(Op); + SDLoc DL(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (!NewOp.getNode()) @@ -1018,7 +1053,7 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); - return DAG.getZeroExtendInReg(NewOp, dl, OldVT); + return DAG.getZeroExtendInReg(NewOp, DL, OldVT); } /// Promote the specified integer binary operation if the target indicates it is @@ -1072,9 +1107,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - SDLoc dl(Op); - return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(Opc, dl, PVT, NN0, NN1)); + SDLoc DL(Op); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(Opc, DL, PVT, NN0, NN1)); } return SDValue(); } @@ -1119,9 +1154,9 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - SDLoc dl(Op); - return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); + SDLoc DL(Op); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1))); } return SDValue(); } @@ -1178,7 +1213,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - SDLoc dl(Op); + SDLoc DL(Op); SDNode *N = Op.getNode(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT MemVT = LD->getMemoryVT(); @@ -1186,10 +1221,10 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); - SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, + SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT, LD->getChain(), LD->getBasePtr(), MemVT, LD->getMemOperand()); - SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); + SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); @@ -1315,7 +1350,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { continue; assert(N->getOpcode() != ISD::DELETED_NODE && - RV.getNode()->getOpcode() != ISD::DELETED_NODE && + RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); DEBUG(dbgs() << " ... into: "; @@ -1562,8 +1597,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { break; case ISD::TokenFactor: - if (Op.hasOneUse() && - std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { + if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) { // Queue up for processing. TFs.push_back(Op.getNode()); // Clean up in case the token factor is removed. @@ -1571,7 +1605,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Changed = true; break; } - // Fall thru + LLVM_FALLTHROUGH; default: // Only add if it isn't already in the list. @@ -1634,6 +1668,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // fold vector ops if (VT.isVector()) { @@ -1650,61 +1685,73 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // fold (add x, undef) -> undef if (N0.isUndef()) return N0; + if (N1.isUndef()) return N1; + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { // canonicalize constant to RHS if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); + return DAG.getNode(ISD::ADD, DL, VT, N1, N0); // fold (add c1, c2) -> c1+c2 - return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, - N0.getNode(), N1.getNode()); + return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(), + N1.getNode()); } + // fold (add x, 0) -> x if (isNullConstant(N1)) return N0; + // fold ((c1-A)+c2) -> (c1+c2)-A - if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) { + if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { if (N0.getOpcode() == ISD::SUB) - if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { - SDLoc DL(N); + if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { return DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(N1C->getAPIntValue()+ - N0C->getAPIntValue(), DL, VT), + DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), N0.getOperand(1)); } } + // reassociate add - if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) + if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1)) return RADD; + // fold ((0-A) + B) -> B-A - if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); + if (N0.getOpcode() == ISD::SUB && + isNullConstantOrNullSplatConstant(N0.getOperand(0))) + return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); + // fold (A + (0-B)) -> A-B - if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0))) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); + if (N1.getOpcode() == ISD::SUB && + isNullConstantOrNullSplatConstant(N1.getOperand(0))) + return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1)); + // fold (A+(B-A)) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) return N1.getOperand(0); + // fold ((B-A)+A) -> B if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) return N0.getOperand(0); + // fold (A+(B-(A+C))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(0)) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), + return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), N1.getOperand(1).getOperand(1)); + // fold (A+(B-(C+A))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(1)) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), + return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), N1.getOperand(1).getOperand(0)); + // fold (A+((B-A)+or-C)) to (B+or-C) if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && N1.getOperand(0).getOpcode() == ISD::SUB && N0 == N1.getOperand(0).getOperand(1)) - return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, - N1.getOperand(0).getOperand(0), N1.getOperand(1)); + return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0), + N1.getOperand(1)); // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { @@ -1713,52 +1760,50 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N10 = N1.getOperand(0); SDValue N11 = N1.getOperand(1); - if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, + if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10)) + return DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } - if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (a+b) -> (a|b) iff a and b share no bits. if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && - VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1)) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); + VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1)) + return DAG.getNode(ISD::OR, DL, VT, N0, N1); // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && - isNullConstant(N1.getOperand(0).getOperand(0))) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, - DAG.getNode(ISD::SHL, SDLoc(N), VT, + isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, DL, VT, N0, + DAG.getNode(ISD::SHL, DL, VT, N1.getOperand(0).getOperand(1), N1.getOperand(1))); if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && - isNullConstant(N0.getOperand(0).getOperand(0))) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, - DAG.getNode(ISD::SHL, SDLoc(N), VT, + isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, DL, VT, N1, + DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0).getOperand(1), N0.getOperand(1))); if (N1.getOpcode() == ISD::AND) { SDValue AndOp0 = N1.getOperand(0); unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); - unsigned DestBits = VT.getScalarType().getSizeInBits(); + unsigned DestBits = VT.getScalarSizeInBits(); // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. - if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { - SDLoc DL(N); + if (NumSignBits == DestBits && + isOneConstantOrOneSplatConstant(N1->getOperand(1))) return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); - } } // add (sext i1), X -> sub X, (zext i1) if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.getOperand(0).getValueType() == MVT::i1 && !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { - SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } @@ -1767,7 +1812,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); if (TN->getVT() == MVT::i1) { - SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); @@ -1853,6 +1897,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // fold vector ops if (VT.isVector()) { @@ -1867,62 +1912,97 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) - return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); + return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes); if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // fold (sub c1, c2) -> c1-c2 - return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, - N0.getNode(), N1.getNode()); + return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), + N1.getNode()); } - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + // fold (sub x, c) -> (add x, -c) if (N1C) { - SDLoc DL(N); return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); } + + if (isNullConstantOrNullSplatConstant(N0)) { + unsigned BitWidth = VT.getScalarSizeInBits(); + // Right-shifting everything out but the sign bit followed by negation is + // the same as flipping arithmetic/logical shift type without the negation: + // -(X >>u 31) -> (X >>s 31) + // -(X >>s 31) -> (X >>u 31) + if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) { + ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1)); + if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) { + auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA; + if (!LegalOperations || TLI.isOperationLegal(NewSh, VT)) + return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1)); + } + } + + // 0 - X --> 0 if the sub is NUW. + if (N->getFlags()->hasNoUnsignedWrap()) + return N0; + + if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) { + // N1 is either 0 or the minimum signed value. If the sub is NSW, then + // N1 must be 0 because negating the minimum signed value is undefined. + if (N->getFlags()->hasNoSignedWrap()) + return N0; + + // 0 - X --> X if X is 0 or the minimum signed value. + return N1; + } + } + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) - if (isAllOnesConstant(N0)) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); + if (isAllOnesConstantOrAllOnesSplatConstant(N0)) + return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) return N1.getOperand(1); + // fold (A+B)-A -> B if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) return N0.getOperand(1); + // fold (A+B)-B -> A if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); + // fold C2-(A+C1) -> (C2-C1)-A - ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : - dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); - if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { - SDLoc DL(N); - SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), - DL, VT); - return DAG.getNode(ISD::SUB, DL, VT, NewC, - N1.getOperand(0)); + if (N1.getOpcode() == ISD::ADD) { + SDValue N11 = N1.getOperand(1); + if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && + isConstantOrConstantVector(N11, /* NoOpaques */ true)) { + SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11); + return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); + } } + // fold ((A+(B+or-C))-B) -> A+or-C if (N0.getOpcode() == ISD::ADD && (N0.getOperand(1).getOpcode() == ISD::SUB || N0.getOperand(1).getOpcode() == ISD::ADD) && N0.getOperand(1).getOperand(0) == N1) - return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1).getOperand(1)); + return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0), + N0.getOperand(1).getOperand(1)); + // fold ((A+(C+B))-B) -> A+C - if (N0.getOpcode() == ISD::ADD && - N0.getOperand(1).getOpcode() == ISD::ADD && + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1).getOperand(0)); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), + N0.getOperand(1).getOperand(0)); + // fold ((A-(B-C))-C) -> A-B - if (N0.getOpcode() == ISD::SUB && - N0.getOperand(1).getOpcode() == ISD::SUB && + if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1).getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), + N0.getOperand(1).getOperand(0)); // If either operand of a sub is undef, the result is undef if (N0.isUndef()) @@ -1937,19 +2017,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N1C && GA->getOpcode() == ISD::GlobalAddress) return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, GA->getOffset() - - (uint64_t)N1C->getSExtValue()); + (uint64_t)N1C->getSExtValue()); // fold (sub Sym+c1, Sym+c2) -> c1-c2 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) if (GA->getGlobal() == GB->getGlobal()) return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), - SDLoc(N), VT); + DL, VT); } // sub X, (sextinreg Y i1) -> add X, (and Y 1) if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); if (TN->getVT() == MVT::i1) { - SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); @@ -2048,7 +2127,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // We require a splat of the entire scalar bit width for non-contiguous // bit patterns. bool IsFullSplat = - ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); + ConstValue1.getBitWidth() == VT.getScalarSizeInBits(); // fold (mul x, 1) -> x if (N1IsConst && ConstValue1 == 1 && IsFullSplat) return N0; @@ -2080,28 +2159,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { getShiftAmountTy(N0.getValueType())))); } - APInt Val; // (mul (shl X, c1), c2) -> (mul X, c2 << c1) - if (N1IsConst && N0.getOpcode() == ISD::SHL && - (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa<ConstantSDNode>(N0.getOperand(1)))) { + if (N0.getOpcode() == ISD::SHL && + isConstantOrConstantVector(N1, /* NoOpaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); - AddToWorklist(C3.getNode()); - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); + if (isConstantOrConstantVector(C3)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one // use. { SDValue Sh(nullptr, 0), Y(nullptr, 0); + // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && - (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa<ConstantSDNode>(N0.getOperand(1))) && + isConstantOrConstantVector(N0.getOperand(1)) && N0.getNode()->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N1.getOperand(1)) && + isConstantOrConstantVector(N1.getOperand(1)) && N1.getNode()->hasOneUse()) { Sh = N1; Y = N0; } @@ -2188,8 +2266,8 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { SDValue Op1 = Node->getOperand(1); SDValue combined; for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; + UE = Op0.getNode()->use_end(); UI != UE;) { + SDNode *User = *UI++; if (User == Node || User->use_empty()) continue; // Convert the other matching node(s), too; @@ -2246,10 +2324,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 - if (!VT.isVector()) { - if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); - } + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); // fold (sdiv X, pow2) -> simple ops after legalize // FIXME: We check for the exact bit here because the generic lowering gives @@ -2302,8 +2378,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return Op; // sdiv, srem -> sdivrem - // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. - // Otherwise, we break the simplification logic in visitREM(). + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is + // true. Otherwise, we break the simplification logic in visitREM(). if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue DivRem = useDivRem(N)) return DivRem; @@ -2337,25 +2413,33 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, N0C, N1C)) return Folded; + // fold (udiv x, (1 << c)) -> x >>u c - if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SRL, DL, VT, N0, - DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, - getShiftAmountTy(N0.getValueType()))); + if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N1)) { + SDValue LogBase2 = BuildLogBase2(N1, DL); + AddToWorklist(LogBase2.getNode()); + + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); + AddToWorklist(Trunc.getNode()); + return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); + } // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { - if (SHC->getAPIntValue().isPowerOf2()) { - EVT ADDVT = N1.getOperand(1).getValueType(); - SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, - N1.getOperand(1), - DAG.getConstant(SHC->getAPIntValue() - .logBase2(), - DL, ADDVT)); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::SRL, DL, VT, N0, Add); - } + SDValue N10 = N1.getOperand(0); + if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N10)) { + SDValue LogBase2 = BuildLogBase2(N10, DL); + AddToWorklist(LogBase2.getNode()); + + EVT ADDVT = N1.getOperand(1).getValueType(); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); + AddToWorklist(Trunc.getNode()); + SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::SRL, DL, VT, N0, Add); } } @@ -2366,8 +2450,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { return Op; // sdiv, srem -> sdivrem - // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. - // Otherwise, we break the simplification logic in visitREM(). + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is + // true. Otherwise, we break the simplification logic in visitREM(). if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue DivRem = useDivRem(N)) return DivRem; @@ -2401,27 +2485,25 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (isSigned) { // If we know the sign bits of both operands are zero, strength reduce to a // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 - if (!VT.isVector()) { - if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UREM, DL, VT, N0, N1); - } + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UREM, DL, VT, N0, N1); } else { // fold (urem x, pow2) -> (and x, pow2-1) - if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && - N1C->getAPIntValue().isPowerOf2()) { - return DAG.getNode(ISD::AND, DL, VT, N0, - DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); + if (DAG.isKnownToBeAPowerOfTwo(N1)) { + APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits()); + SDValue Add = + DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); } // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) - if (N1.getOpcode() == ISD::SHL) { - ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0)); - if (SHC && SHC->getAPIntValue().isPowerOf2()) { - APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits()); - SDValue Add = - DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::AND, DL, VT, N0, Add); - } + if (N1.getOpcode() == ISD::SHL && + DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) { + APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits()); + SDValue Add = + DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); } } @@ -2477,8 +2559,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (isOneConstant(N1)) { SDLoc DL(N); return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, - DAG.getConstant(N0.getValueType().getSizeInBits() - 1, - DL, + DAG.getConstant(N0.getValueSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); } // fold (mulhs x, undef) -> 0 @@ -2706,7 +2787,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); // Bail early if none of these transforms apply. - if (N0.getNode()->getNumOperands() == 0) return SDValue(); + if (N0.getNumOperands() == 0) return SDValue(); // For each of OP in AND/OR/XOR: // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) @@ -2872,25 +2953,34 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, LL.getValueType().isInteger()) { // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) if (isNullConstant(LR) && Op1 == ISD::SETEQ) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + EVT CCVT = getSetCCResultType(LR.getValueType()); + if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } } if (isAllOnesConstant(LR)) { // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) if (Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + EVT CCVT = getSetCCResultType(LR.getValueType()); + if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } } // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) if (Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + EVT CCVT = getSetCCResultType(LR.getValueType()); + if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } } } } @@ -2899,14 +2989,17 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, Op0 == Op1 && LL.getValueType().isInteger() && Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || (isAllOnesConstant(LR) && isNullConstant(RR)))) { - SDLoc DL(N0); - SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), - LL, DAG.getConstant(1, DL, - LL.getValueType())); - AddToWorklist(ADDNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, - DAG.getConstant(2, DL, LL.getValueType()), - ISD::SETUGE); + EVT CCVT = getSetCCResultType(LL.getValueType()); + if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { + SDLoc DL(N0); + SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), + LL, DAG.getConstant(1, DL, + LL.getValueType())); + AddToWorklist(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, + DAG.getConstant(2, DL, LL.getValueType()), + ISD::SETUGE); + } } // canonicalize equivalent to ll == rl if (LL == RR && LR == RL) { @@ -2967,6 +3060,11 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, unsigned Size = VT.getSizeInBits(); const APInt &AndMask = CAnd->getAPIntValue(); unsigned ShiftBits = CShift->getZExtValue(); + + // Bail out, this node will probably disappear anyway. + if (ShiftBits == 0) + return SDValue(); + unsigned MaskBits = AndMask.countTrailingOnes(); EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); @@ -2985,7 +3083,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, // extended to handle extensions mixed in. SDValue SL(N0); - assert(ShiftBits != 0 && MaskBits <= Size); + assert(MaskBits <= Size); // Extracting the highest bit of the low half. EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); @@ -3050,6 +3148,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N1.getValueType(); + // x & x --> x + if (N0 == N1) + return N0; + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -3058,16 +3160,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and x, 0) -> 0, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) // do not return N0, because undef node may exist in N0 - return DAG.getConstant( - APInt::getNullValue( - N0.getValueType().getScalarType().getSizeInBits()), - SDLoc(N), N0.getValueType()); + return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), + SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 - return DAG.getConstant( - APInt::getNullValue( - N1.getValueType().getScalarType().getSizeInBits()), - SDLoc(N), N1.getValueType()); + return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), + SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) @@ -3078,7 +3176,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and c1, c2) -> c1&c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS @@ -3089,7 +3187,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (isAllOnesConstant(N1)) return N0; // if (and x, c) is known to be zero, return 0 - unsigned BitWidth = VT.getScalarType().getSizeInBits(); + unsigned BitWidth = VT.getScalarSizeInBits(); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(BitWidth))) return DAG.getConstant(0, SDLoc(N), VT); @@ -3098,14 +3196,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return RAND; // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) - if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1))) if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) return N1; // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N0Op0 = N0.getOperand(0); APInt Mask = ~N1C->getAPIntValue(); - Mask = Mask.trunc(N0Op0.getValueSizeInBits()); + Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits()); if (DAG.MaskedValueIsZero(N0Op0, Mask)) { SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0); @@ -3156,7 +3254,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // that will apply equally to all members of the vector, so AND all the // lanes of the constant together. EVT VT = Vector->getValueType(0); - unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + unsigned BitWidth = VT.getScalarSizeInBits(); // If the splat value has been compressed to a bitlength lower // than the size of the vector lane, we need to re-expand it to @@ -3187,8 +3285,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Resize the constant to the same size as the original memory access before // extension. If it is still the AllOnesValue then this AND is completely // unneeded. - Constant = - Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); + Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits()); bool B; switch (Load->getExtensionType()) { @@ -3230,9 +3327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) - if (N1C && (N0.getOpcode() == ISD::LOAD || - (N0.getOpcode() == ISD::ANY_EXTEND && - N0.getOperand(0).getOpcode() == ISD::LOAD))) { + if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || + (N0.getOpcode() == ISD::ANY_EXTEND && + N0.getOperand(0).getOpcode() == ISD::LOAD))) { bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; LoadSDNode *LN0 = HasAnyExt ? cast<LoadSDNode>(N0.getOperand(0)) @@ -3293,10 +3390,29 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) return Tmp; + // Masking the negated extension of a boolean is just the zero-extended + // boolean: + // and (sub 0, zext(bool X)), 1 --> zext(bool X) + // and (sub 0, sext(bool X)), 1 --> zext(bool X) + // + // Note: the SimplifyDemandedBits fold below can make an information-losing + // transform, and then we have no way to find this better fold. + if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) { + ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0)); + SDValue SubRHS = N0.getOperand(1); + if (SubLHS && SubLHS->isNullValue()) { + if (SubRHS.getOpcode() == ISD::ZERO_EXTEND && + SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) + return SubRHS; + if (SubRHS.getOpcode() == ISD::SIGN_EXTEND && + SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0)); + } + } + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) // fold (and (sra)) -> (and (srl)) when possible. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) + if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (zext_inreg (extload x)) -> (zextload x) @@ -3305,9 +3421,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + unsigned BitWidth = N1.getScalarValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && + BitWidth - MemVT.getScalarSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, @@ -3325,9 +3441,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + unsigned BitWidth = N1.getScalarValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && + BitWidth - MemVT.getScalarSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, @@ -3391,8 +3507,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) return SDValue(); - if (!N0.getNode()->hasOneUse() || - !N1.getNode()->hasOneUse()) + if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); @@ -3627,18 +3742,24 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + EVT CCVT = getSetCCResultType(LR.getValueType()); + if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } } // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + EVT CCVT = getSetCCResultType(LR.getValueType()); + if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } } } // canonicalize equivalent to ll == rl @@ -3708,6 +3829,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N1.getValueType(); + // x | x --> x + if (N0 == N1) + return N0; + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -3723,15 +3848,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (ISD::isBuildVectorAllOnes(N0.getNode())) // do not return N0, because undef node may exist in N0 return DAG.getConstant( - APInt::getAllOnesValue( - N0.getValueType().getScalarType().getSizeInBits()), - SDLoc(N), N0.getValueType()); + APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N), + N0.getValueType()); if (ISD::isBuildVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant( - APInt::getAllOnesValue( - N1.getValueType().getScalarType().getSizeInBits()), - SDLoc(N), N1.getValueType()); + APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N), + N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) // Do this only if the resulting shuffle is legal. @@ -4122,6 +4245,110 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { return nullptr; } +namespace { +/// Helper struct to parse and store a memory address as base + index + offset. +/// We ignore sign extensions when it is safe to do so. +/// The following two expressions are not equivalent. To differentiate we need +/// to store whether there was a sign extension involved in the index +/// computation. +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (add (i8 load %index) +/// (i8 1)))) +/// vs +/// +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) +/// (i32 1))))) +struct BaseIndexOffset { + SDValue Base; + SDValue Index; + int64_t Offset; + bool IsIndexSignExt; + + BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} + + BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, + bool IsIndexSignExt) : + Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} + + bool equalBaseIndex(const BaseIndexOffset &Other) { + return Other.Base == Base && Other.Index == Index && + Other.IsIndexSignExt == IsIndexSignExt; + } + + /// Parses tree in Ptr for base, index, offset addresses. + static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG, + int64_t PartialOffset = 0) { + bool IsIndexSignExt = false; + + // Split up a folded GlobalAddress+Offset into its component parts. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr)) + if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) { + return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), + SDLoc(GA), + GA->getValueType(0), + /*Offset=*/PartialOffset, + /*isTargetGA=*/false, + GA->getTargetFlags()), + SDValue(), + GA->getOffset(), + IsIndexSignExt); + } + + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD + // instruction, then it could be just the BASE or everything else we don't + // know how to handle. Just use Ptr as BASE and give up. + if (Ptr->getOpcode() != ISD::ADD) + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); + + // We know that we have at least an ADD instruction. Try to pattern match + // the simple case of BASE + OFFSET. + if (isa<ConstantSDNode>(Ptr->getOperand(1))) { + int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); + return match(Ptr->getOperand(0), DAG, Offset + PartialOffset); + } + + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Ptr is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); + + // Look at Base + Index + Offset cases. + SDValue Base = Ptr->getOperand(0); + SDValue IndexOffset = Ptr->getOperand(1); + + // Skip signextends. + if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { + IndexOffset = IndexOffset->getOperand(0); + IsIndexSignExt = true; + } + + // Either the case of Base + Index (no offset) or something else. + if (IndexOffset->getOpcode() != ISD::ADD) + return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt); + + // Now we have the case of Base + Index + offset. + SDValue Index = IndexOffset->getOperand(0); + SDValue Offset = IndexOffset->getOperand(1); + + if (!isa<ConstantSDNode>(Offset)) + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); + + // Ignore signextends. + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } else IsIndexSignExt = false; + + int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); + return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt); + } +}; +} // namespace + SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4317,16 +4544,20 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); if (!BinOpCst) return SDValue(); - // FIXME: disable this unless the input to the binop is a shift by a constant. - // If it is not a shift, it pessimizes some common cases like: - // - // void foo(int *X, int i) { X[i & 1235] = 1; } - // int bar(int *X, int i) { return X[i & 255]; } + // FIXME: disable this unless the input to the binop is a shift by a constant + // or is copy/select.Enable this in other cases when figure out it's exactly profitable. SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); - if ((BinOpLHSVal->getOpcode() != ISD::SHL && - BinOpLHSVal->getOpcode() != ISD::SRA && - BinOpLHSVal->getOpcode() != ISD::SRL) || - !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) + bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL || + BinOpLHSVal->getOpcode() == ISD::SRA || + BinOpLHSVal->getOpcode() == ISD::SRL; + bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg || + BinOpLHSVal->getOpcode() == ISD::SELECT; + + if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) && + !isCopyOrSelect) + return SDValue(); + + if (isCopyOrSelect && N->hasOneUse()) return SDValue(); EVT VT = N->getValueType(0); @@ -4366,19 +4597,15 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { SDValue N01 = N->getOperand(0).getOperand(1); - - if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { - if (!N01C->isOpaque()) { - EVT TruncVT = N->getValueType(0); - SDValue N00 = N->getOperand(0).getOperand(0); - APInt TruncC = N01C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); - SDLoc DL(N); - - return DAG.getNode(ISD::AND, DL, TruncVT, - DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), - DAG.getConstant(TruncC, DL, TruncVT)); - } + if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) { + SDLoc DL(N); + EVT TruncVT = N->getValueType(0); + SDValue N00 = N->getOperand(0).getOperand(0); + SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00); + SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01); + AddToWorklist(Trunc00.getNode()); + AddToWorklist(Trunc01.getNode()); + return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01); } } @@ -4404,7 +4631,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; @@ -4425,12 +4651,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N01CV, N1CV)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } - } else { - N1C = isConstOrConstSplat(N1); } } } + ConstantSDNode *N1C = isConstOrConstSplat(N1); + // fold (shl c1, c2) -> c1<<c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); if (N0C && N1C && !N1C->isOpaque()) @@ -4464,13 +4690,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SHL) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - uint64_t c1 = N0C1->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); SDLoc DL(N); - if (c1 + c2 >= OpSizeInBits) + APInt c1 = N0C1->getAPIntValue(); + APInt c2 = N1C->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + + APInt Sum = c1 + c2; + if (Sum.uge(OpSizeInBits)) return DAG.getConstant(0, DL, VT); - return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, DL, N1.getValueType())); + + return DAG.getNode( + ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); } } @@ -4485,18 +4716,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N0.getOperand(0).getOpcode() == ISD::SHL) { SDValue N0Op0 = N0.getOperand(0); if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { - uint64_t c1 = N0Op0C1->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); + APInt c1 = N0Op0C1->getAPIntValue(); + APInt c2 = N1C->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + EVT InnerShiftVT = N0Op0.getValueType(); uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); - if (c2 >= OpSizeInBits - InnerShiftSize) { + if (c2.uge(OpSizeInBits - InnerShiftSize)) { SDLoc DL(N0); - if (c1 + c2 >= OpSizeInBits) + APInt Sum = c1 + c2; + if (Sum.uge(OpSizeInBits)) return DAG.getConstant(0, DL, VT); - return DAG.getNode(ISD::SHL, DL, VT, - DAG.getNode(N0.getOpcode(), DL, VT, - N0Op0->getOperand(0)), - DAG.getConstant(c1 + c2, DL, N1.getValueType())); + + return DAG.getNode( + ISD::SHL, DL, VT, + DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)), + DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); } } } @@ -4508,8 +4743,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N0.getOperand(0).getOpcode() == ISD::SRL) { SDValue N0Op0 = N0.getOperand(0); if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { - uint64_t c1 = N0Op0C1->getZExtValue(); - if (c1 < VT.getScalarSizeInBits()) { + if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) { + uint64_t c1 = N0Op0C1->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); if (c1 == c2) { SDValue NewOp0 = N0.getOperand(0); @@ -4569,37 +4804,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } } + // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) - if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) && + isConstantOrConstantVector(N1, /* No Opaques */ true)) { unsigned BitSize = VT.getScalarSizeInBits(); SDLoc DL(N); - SDValue HiBitsMask = - DAG.getConstant(APInt::getHighBitsSet(BitSize, - BitSize - N1C->getZExtValue()), - DL, VT); - return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), - HiBitsMask); + SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT); + SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); } // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) // Variant of version done on multiply, except mul by a power of 2 is turned // into a shift. - APInt Val; - if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && - (isa<ConstantSDNode>(N0.getOperand(1)) || - ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { + if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + isConstantOrConstantVector(N1, /* No Opaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); + AddToWorklist(Shl0.getNode()); + AddToWorklist(Shl1.getNode()); return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); } // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) - if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) { - if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - if (SDValue Folded = - DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C)) - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded); - } + if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() && + isConstantOrConstantVector(N1, /* No Opaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); + if (isConstantOrConstantVector(Shl)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); } if (N1C && !N1C->isOpaque()) @@ -4613,16 +4848,18 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + + // Arithmetic shifting an all-sign-bit value is a no-op. + if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) + return N0; // fold vector ops - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - if (VT.isVector()) { + if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - N1C = isConstOrConstSplat(N1); - } + ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (sra c1, c2) -> (sra c1, c2) ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); @@ -4634,8 +4871,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra -1, x) -> -1 if (isAllOnesConstant(N0)) return N0; - // fold (sra x, (setge c, size(x))) -> undef - if (N1C && N1C->getZExtValue() >= OpSizeInBits) + // fold (sra x, c >= size(x)) -> undef + if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) return DAG.getUNDEF(VT); // fold (sra x, 0) -> x if (N1C && N1C->isNullValue()) @@ -4656,13 +4893,19 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRA) { - if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { - unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); - if (Sum >= OpSizeInBits) - Sum = OpSizeInBits - 1; + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { SDLoc DL(N); - return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), - DAG.getConstant(Sum, DL, N1.getValueType())); + APInt c1 = N0C1->getAPIntValue(); + APInt c2 = N1C->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + + APInt Sum = c1 + c2; + if (Sum.uge(OpSizeInBits)) + Sum = APInt(OpSizeInBits, OpSizeInBits - 1); + + return DAG.getNode( + ISD::SRA, DL, VT, N0.getOperand(0), + DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); } } @@ -4759,16 +5002,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - if (VT.isVector()) { + if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - N1C = isConstOrConstSplat(N1); - } + ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (srl c1, c2) -> c1 >>u c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); @@ -4778,7 +5019,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (isNullConstant(N0)) return N0; // fold (srl x, c >= size(x)) -> undef - if (N1C && N1C->getZExtValue() >= OpSizeInBits) + if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) return DAG.getUNDEF(VT); // fold (srl x, 0) -> x if (N1C && N1C->isNullValue()) @@ -4790,14 +5031,19 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRL) { - if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { - uint64_t c1 = N01C->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { SDLoc DL(N); - if (c1 + c2 >= OpSizeInBits) + APInt c1 = N0C1->getAPIntValue(); + APInt c2 = N1C->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + + APInt Sum = c1 + c2; + if (Sum.uge(OpSizeInBits)) return DAG.getConstant(0, DL, VT); - return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, DL, N1.getValueType())); + + return DAG.getNode( + ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); } } @@ -4810,7 +5056,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { uint64_t c2 = N1C->getZExtValue(); EVT InnerShiftVT = N0.getOperand(0).getValueType(); EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); - uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); // This is only valid if the OpSizeInBits + c1 = size of inner shift. if (c1 + OpSizeInBits == InnerShiftSize) { SDLoc DL(N0); @@ -4825,14 +5071,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl (shl x, c), c) -> (and x, cst2) - if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { - unsigned BitSize = N0.getScalarValueSizeInBits(); - if (BitSize <= 64) { - uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; - SDLoc DL(N); - return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), - DAG.getConstant(~0ULL >> ShAmt, DL, VT)); - } + if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && + isConstantOrConstantVector(N1, /* NoOpaques */ true)) { + SDLoc DL(N); + APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits()); + SDValue Mask = + DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1); + AddToWorklist(Mask.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); } // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) @@ -5065,6 +5311,41 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, } } +// TODO: We should handle other cases of selecting between {-1,0,1} here. +SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { + SDValue Cond = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + EVT CondVT = Cond.getValueType(); + SDLoc DL(N); + + // fold (select Cond, 0, 1) -> (xor Cond, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. + if (VT.isInteger() && + (CondVT == MVT::i1 || (CondVT.isInteger() && + TLI.getBooleanContents(false, true) == + TargetLowering::ZeroOrOneBooleanContent && + TLI.getBooleanContents(false, false) == + TargetLowering::ZeroOrOneBooleanContent)) && + isNullConstant(N1) && isOneConstant(N2)) { + SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond, + DAG.getConstant(1, DL, CondVT)); + if (VT.bitsEq(CondVT)) + return NotCond; + return DAG.getZExtOrTrunc(NotCond, DL, VT); + } + + return SDValue(); +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5080,39 +5361,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold (select false, X, Y) -> Y return !N0C->isNullValue() ? N1 : N2; } - // fold (select C, 1, X) -> (or C, X) - if (VT == MVT::i1 && isOneConstant(N1)) + // fold (select X, X, Y) -> (or X, Y) + // fold (select X, 1, Y) -> (or C, Y) + if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); - // fold (select C, 0, 1) -> (xor C, 1) - // We can't do this reliably if integer based booleans have different contents - // to floating point based booleans. This is because we can't tell whether we - // have an integer-based boolean or a floating-point-based boolean unless we - // can find the SETCC that produced it and inspect its operands. This is - // fairly easy if C is the SETCC node, but it can potentially be - // undiscoverable (or not reasonably discoverable). For example, it could be - // in another basic block or it could require searching a complicated - // expression. - if (VT.isInteger() && - (VT0 == MVT::i1 || (VT0.isInteger() && - TLI.getBooleanContents(false, false) == - TLI.getBooleanContents(false, true) && - TLI.getBooleanContents(false, false) == - TargetLowering::ZeroOrOneBooleanContent)) && - isNullConstant(N1) && isOneConstant(N2)) { - SDValue XORNode; - if (VT == VT0) { - SDLoc DL(N); - return DAG.getNode(ISD::XOR, DL, VT0, - N0, DAG.getConstant(1, DL, VT0)); - } - SDLoc DL0(N0); - XORNode = DAG.getNode(ISD::XOR, DL0, VT0, - N0, DAG.getConstant(1, DL0, VT0)); - AddToWorklist(XORNode.getNode()); - if (VT.bitsGT(VT0)) - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); - } + + if (SDValue V = foldSelectOfConstants(N)) + return V; + // fold (select C, 0, X) -> (and (not C), X) if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); @@ -5125,16 +5381,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } - // fold (select C, X, 0) -> (and C, X) - if (VT == MVT::i1 && isNullConstant(N2)) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); - // fold (select X, X, Y) -> (or X, Y) - // fold (select X, 1, Y) -> (or X, Y) - if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) - if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) + if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // If we can fold this based on the true/false value, do so. @@ -5145,7 +5394,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // The code in this block deals with the following 2 equivalences: // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) - // The target can specify its prefered form with the + // The target can specify its preferred form with the // shouldNormalizeToSelectSequence() callback. However we always transform // to the right anyway if we find the inner select exists in the DAG anyway // and we always transform to the left side if we know that we can further @@ -5214,6 +5463,18 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } + // select (xor Cond, 1), X, Y -> select Cond, Y, X + if (VT0 == MVT::i1) { + if (N0->getOpcode() == ISD::XOR) { + if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) { + SDValue Cond0 = N0->getOperand(0); + if (C->isOne()) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), + Cond0, N2, N1); + } + } + } + // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { // select x, y (fcmp lt x, y) -> fminnum x, y @@ -5269,7 +5530,7 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { // This function assumes all the vselect's arguments are CONCAT_VECTOR // nodes and that the condition is a BV of ConstantSDNodes (or undefs). static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { - SDLoc dl(N); + SDLoc DL(N); SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); @@ -5316,7 +5577,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { "One half of the selector was all UNDEFs and the other was all the " "same value. This should have been addressed before this function."); return DAG.getNode( - ISD::CONCAT_VECTORS, dl, VT, + ISD::CONCAT_VECTORS, DL, VT, BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } @@ -5390,6 +5651,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); SDValue Mask = MST->getMask(); SDValue Data = MST->getValue(); + EVT VT = Data.getValueType(); SDLoc DL(N); // If the MSTORE data type requires splitting and the mask is provided by a @@ -5399,16 +5661,13 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (Mask.getOpcode() == ISD::SETCC) { // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != + if (TLI.getTypeAction(*DAG.getContext(), VT) != TargetLowering::TypeSplitVector) return SDValue(); SDValue MaskLo, MaskHi, Lo, Hi; std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); - SDValue Chain = MST->getChain(); SDValue Ptr = MST->getBasePtr(); @@ -5418,8 +5677,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { // if Alignment is equal to the vector size, // take the half of it for the second part unsigned SecondHalfAlignment = - (Alignment == Data->getValueType(0).getSizeInBits()/8) ? - Alignment/2 : Alignment; + (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment; EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); @@ -5433,11 +5691,11 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { Alignment, MST->getAAInfo(), MST->getRanges()); Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - MST->isTruncatingStore()); + MST->isTruncatingStore(), + MST->isCompressingStore()); - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, + MST->isCompressingStore()); MMO = DAG.getMachineFunction(). getMachineMemOperand(MST->getPointerInfo(), @@ -5446,7 +5704,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { MST->getRanges()); Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - MST->isTruncatingStore()); + MST->isTruncatingStore(), + MST->isCompressingStore()); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); @@ -5585,11 +5844,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { Alignment, MLD->getAAInfo(), MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, - ISD::NON_EXTLOAD); + ISD::NON_EXTLOAD, MLD->isExpandingLoad()); - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, + MLD->isExpandingLoad()); MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), @@ -5597,7 +5855,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, - ISD::NON_EXTLOAD); + ISD::NON_EXTLOAD, MLD->isExpandingLoad()); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); @@ -5625,6 +5883,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N2 = N->getOperand(2); SDLoc DL(N); + // fold (vselect C, X, X) -> X + if (N1 == N2) + return N1; + // Canonicalize integer abs. // vselect (setg[te] X, 0), X, -X -> // vselect (setgt X, -1), X, -X -> @@ -5648,7 +5910,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { EVT VT = LHS.getValueType(); SDValue Shift = DAG.getNode( ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT)); + DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -5803,7 +6065,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, // We can fold this node into a build_vector. unsigned VTBits = SVT.getSizeInBits(); - unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); + unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits(); SmallVector<SDValue, 8> Elts; unsigned NumElts = VT.getVectorNumElements(); SDLoc DL(N); @@ -6026,7 +6288,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { - SDNode* oye = N0.getNode()->getOperand(0).getNode(); + SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. @@ -6038,9 +6300,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // See if the value being truncated is already sign extended. If so, just // eliminate the trunc/sext pair. SDValue Op = N0.getOperand(0); - unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); - unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); - unsigned DestBits = VT.getScalarType().getSizeInBits(); + unsigned OpBits = Op.getScalarValueSizeInBits(); + unsigned MidBits = N0.getScalarValueSizeInBits(); + unsigned DestBits = VT.getScalarSizeInBits(); unsigned NumSignBits = DAG.ComputeNumSignBits(Op); if (OpBits == DestBits) { @@ -6201,7 +6463,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) // Here, T can be 1 or -1, depending on the type of the setcc and // getBooleanContents(). - unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits(); + unsigned SetCCWidth = N0.getScalarValueSizeInBits(); SDLoc DL(N); // To determine the "true" side of the select, we need to know the high bit @@ -6323,7 +6585,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { - SDNode* oye = N0.getNode()->getOperand(0).getNode(); + SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. @@ -6338,7 +6600,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { - SDNode *oye = N0.getNode()->getOperand(0).getNode(); + SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. @@ -6528,7 +6790,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // elements we can use a matching integer vector type and then // truncate/sign extend. EVT MatchingElementType = EVT::getIntegerVT( - *DAG.getContext(), N00VT.getScalarType().getSizeInBits()); + *DAG.getContext(), N00VT.getScalarSizeInBits()); EVT MatchingVectorType = EVT::getVectorVT( *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements()); SDValue VsetCC = @@ -6558,8 +6820,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue InnerZExt = N0.getOperand(0); // If the original shl may be shifting out bits, do not perform this // transformation. - unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - - InnerZExt.getOperand(0).getValueType().getSizeInBits(); + unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() - + InnerZExt.getOperand(0).getValueSizeInBits(); if (ShAmtVal > KnownZeroBits) return SDValue(); } @@ -6598,7 +6860,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { - SDNode* oye = N0.getNode()->getOperand(0).getNode(); + SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. @@ -6625,15 +6887,15 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.getOperand(1).getOpcode() == ISD::Constant && !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), N0.getValueType())) { + SDLoc DL(N); SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); + X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X); } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); + X = DAG.getNode(ISD::TRUNCATE, DL, VT, X); } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, X, DAG.getConstant(Mask, DL, VT)); } @@ -6820,7 +7082,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if ((ShAmt & (EVTBits-1)) == 0) { N0 = N0.getOperand(0); // Is the load width a multiple of size of VT? - if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) + if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0) return SDValue(); } @@ -6952,8 +7214,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT EVT = cast<VTSDNode>(N1)->getVT(); - unsigned VTBits = VT.getScalarType().getSizeInBits(); - unsigned EVTBits = EVT.getScalarType().getSizeInBits(); + unsigned VTBits = VT.getScalarSizeInBits(); + unsigned EVTBits = EVT.getScalarSizeInBits(); if (N0.isUndef()) return DAG.getUNDEF(VT); @@ -6977,14 +7239,23 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // if x is small enough. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && + if (N00.getScalarValueSizeInBits() <= EVTBits && + (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); + } + + // fold (sext_in_reg (zext x)) -> (sext x) + // iff we are extending the source sign bit. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getScalarValueSizeInBits() == EVTBits && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) - return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); + return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType()); // fold operands of sext_in_reg based on knowledge that the top bits are not // demanded. @@ -7111,6 +7382,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0.getOperand(0); } + // If this is anyext(trunc), don't fold it, allow ourselves to be folded. + if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) + return SDValue(); + // Fold extract-and-trunc into a narrow extract. For example: // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) // i32 y = TRUNCATE(i64 x) @@ -7148,7 +7423,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } // trunc (select c, a, b) -> select c, (trunc a), (trunc b) - if (N0.getOpcode() == ISD::SELECT) { + if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) { EVT SrcVT = N0.getValueType(); if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && TLI.isTruncateFree(SrcVT, VT)) { @@ -7160,15 +7435,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } - // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2 + // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits() if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && TLI.isTypeDesirableForOp(ISD::SHL, VT)) { if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) { uint64_t Amt = CAmt->getZExtValue(); - unsigned Size = VT.getSizeInBits(); + unsigned Size = VT.getScalarSizeInBits(); - if (Amt < Size / 2) { + if (Amt < Size) { SDLoc SL(N); EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); @@ -7525,7 +7800,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { - unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); + unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1)); @@ -7848,10 +8123,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((AllowFusion || HasFMAD) && Aggressive) { + if (Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (N0.getOpcode() == PreferredFusedOpcode && - N0.getOperand(2).getOpcode() == ISD::FMUL) { + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -7861,8 +8140,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (N1->getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FMUL) { + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N1->getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL && + N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0), N1.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8090,11 +8373,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((AllowFusion || HasFMAD) && Aggressive) { + if (Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode && - N0.getOperand(2).getOpcode() == ISD::FMUL) { + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8106,7 +8393,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (N1.getOpcode() == PreferredFusedOpcode && + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N1.getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); @@ -8221,8 +8511,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { return SDValue(); } -/// Try to perform FMA combining on a given FMUL node. -SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) { +/// Try to perform FMA combining on a given FMUL node based on the distributive +/// law x * (y + 1) = x * y + x and variants thereof (commuted versions, +/// subtraction instead of addition). +SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); @@ -8231,17 +8523,23 @@ SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) { assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); const TargetOptions &Options = DAG.getTarget().Options; - bool AllowFusion = - (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); - // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + // The transforms below are incorrect when x == 0 and y == inf, because the + // intermediate multiplication produces a nan. + if (!Options.NoInfsFPMath) + return SDValue(); // Floating-point multiply-add without intermediate rounding. bool HasFMA = - AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); + // Floating-point multiply-add with intermediate rounding. This can result + // in a less precise result due to the changed rounding order. + bool HasFMAD = Options.UnsafeFPMath && + (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); @@ -8338,17 +8636,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FSUB, DL, VT, N1, GetNegatedExpression(N0, DAG, LegalOperations), Flags); + // FIXME: Auto-upgrade the target/function-level option. + if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) { + // fold (fadd A, 0) -> A + if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) + if (N1C->isZero()) + return N0; + } + // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { // No FP constant should be created after legalization as Instruction // Selection pass has a hard time dealing with FP constants. bool AllowNewConst = (Level < AfterLegalizeDAG); - // fold (fadd A, 0) -> A - if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) - if (N1C->isZero()) - return N0; - // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) @@ -8457,7 +8758,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); - SDLoc dl(N); + SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; @@ -8468,30 +8769,33 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags); + return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags); // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) - return DAG.getNode(ISD::FADD, dl, VT, N0, + return DAG.getNode(ISD::FADD, DL, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations), Flags); - // If 'unsafe math' is enabled, fold lots of things. - if (Options.UnsafeFPMath) { - // (fsub A, 0) -> A - if (N1CFP && N1CFP->isZero()) - return N0; - + // FIXME: Auto-upgrade the target/function-level option. + if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) { // (fsub 0, B) -> -B if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, dl, VT, N1); + return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); } + } + + // If 'unsafe math' is enabled, fold lots of things. + if (Options.UnsafeFPMath) { + // (fsub A, 0) -> A + if (N1CFP && N1CFP->isZero()) + return N0; // (fsub x, x) -> 0.0 if (N0 == N1) - return DAG.getConstantFP(0.0f, dl, VT); + return DAG.getConstantFP(0.0f, DL, VT); // (fsub x, (fadd x, y)) -> (fneg y) // (fsub x, (fadd y, x)) -> (fneg y) @@ -8611,7 +8915,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } // FMUL -> FMA combines: - if (SDValue Fused = visitFMULForFMACombine(N)) { + if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } @@ -8626,14 +8930,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - SDLoc dl(N); + SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; // Constant fold FMA. if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) && isa<ConstantFPSDNode>(N2)) { - return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2); + return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } if (Options.UnsafeFPMath) { @@ -8663,8 +8967,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && isConstantFPBuildVectorOrConstantFP(N1) && isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1), + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1), &Flags), &Flags); } @@ -8672,9 +8976,9 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N0.getOpcode() == ISD::FMUL && isConstantFPBuildVectorOrConstantFP(N1) && isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { - return DAG.getNode(ISD::FMA, dl, VT, + return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), - DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1), + DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1), &Flags), N2); } @@ -8685,32 +8989,32 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP) { if (N1CFP->isExactlyValue(1.0)) // TODO: The FMA node should have flags that propagate to this node. - return DAG.getNode(ISD::FADD, dl, VT, N0, N2); + return DAG.getNode(ISD::FADD, DL, VT, N0, N2); if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { - SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); + SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0); AddToWorklist(RHSNeg.getNode()); // TODO: The FMA node should have flags that propagate to this node. - return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); + return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); } } if (Options.UnsafeFPMath) { // (fma x, c, x) -> (fmul x, (c+1)) if (N1CFP && N0 == N2) { - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(1.0, dl, VT), - &Flags), &Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getNode(ISD::FADD, DL, VT, N1, + DAG.getConstantFP(1.0, DL, VT), &Flags), + &Flags); } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(-1.0, dl, VT), - &Flags), &Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getNode(ISD::FADD, DL, VT, N1, + DAG.getConstantFP(-1.0, DL, VT), &Flags), + &Flags); } } @@ -8720,7 +9024,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) -// Notice that this is not always beneficial. One reason is different target +// Notice that this is not always beneficial. One reason is different targets // may have different costs for FDIV and FMUL, so sometimes the cost of two // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". @@ -8907,14 +9211,18 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap()) + if (!DAG.getTarget().Options.UnsafeFPMath) + return SDValue(); + + SDValue N0 = N->getOperand(0); + if (TLI.isFsqrtCheap(N0, DAG)) return SDValue(); // TODO: FSQRT nodes should have flags that propagate to the created nodes. // For now, create a Flags object for use with all unsafe math transforms. SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); - return buildSqrtEstimate(N->getOperand(0), &Flags); + return buildSqrtEstimate(N0, &Flags); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -8941,11 +9249,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - if (N0CFP && N1CFP) // Constant fold + if (N0CFP && N1CFP) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); if (N1CFP) { - const APFloat& V = N1CFP->getValueAPF(); + const APFloat &V = N1CFP->getValueAPF(); // copysign(x, c1) -> fabs(x) iff ispos(c1) // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) if (!V.isNegative()) { @@ -8963,8 +9271,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(copysign(x,z), y) -> copysign(x, y) if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, - N0.getOperand(0), N1); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); // copysign(x, abs(y)) -> abs(x) if (N1.getOpcode() == ISD::FABS) @@ -8972,14 +9279,12 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(x, copysign(y,z)) -> copysign(x, z) if (N1.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, - N0, N1.getOperand(1)); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, - N0, N1.getOperand(0)); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); return SDValue(); } @@ -9159,7 +9464,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // fold (fp_round (fp_round x)) -> (fp_round x) if (N0.getOpcode() == ISD::FP_ROUND) { const bool NIsTrunc = N->getConstantOperandVal(1) == 1; - const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; + const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1; // Skip this folding if it results in an fp_round from f80 to f16. // @@ -9232,7 +9537,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. if (N0.getOpcode() == ISD::FP_ROUND - && N0.getNode()->getConstantOperandVal(1) == 1) { + && N0.getConstantOperandVal(1) == 1) { SDValue In = N0.getOperand(0); if (In.getValueType() == VT) return In; if (VT.bitsLT(In.getValueType())) @@ -9319,7 +9624,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (N0.getValueType().isVector()) { // For a vector, get a mask such as 0x80... per scalar element // and splat it. - SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits()); SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For a scalar, just generate 0x80... @@ -9424,7 +9729,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { if (N0.getValueType().isVector()) { // For a vector, get a mask such as 0x7f... per scalar element // and splat it. - SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits()); SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For a scalar, just generate 0x7f... @@ -10103,7 +10408,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // value. // TODO: Handle store large -> read small portion. // TODO: Handle TRUNCSTORE/LOADEXT - if (ISD::isNormalLoad(N) && !LD->isVolatile()) { + if (OptLevel != CodeGenOpt::None && + ISD::isNormalLoad(N) && !LD->isVolatile()) { if (ISD::isNON_TRUNCStore(Chain.getNode())) { StoreSDNode *PrevST = cast<StoreSDNode>(Chain); if (PrevST->getBasePtr() == Ptr && @@ -10405,7 +10711,7 @@ struct LoadedSlice { assert(Inst && Origin && "Unable to replace a non-existing slice."); const SDValue &OldBaseAddr = Origin->getBasePtr(); SDValue BaseAddr = OldBaseAddr; - // Get the offset in that chunk of bytes w.r.t. the endianess. + // Get the offset in that chunk of bytes w.r.t. the endianness. int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); assert(Offset >= 0 && "Offset too big to fit in int64_t!"); if (Offset) { @@ -10705,7 +11011,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { LSIt != LSItEnd; ++LSIt) { SDValue SliceInst = LSIt->loadSlice(); CombineTo(LSIt->Inst, SliceInst, true); - if (SliceInst.getNode()->getOpcode() != ISD::LOAD) + if (SliceInst.getOpcode() != ISD::LOAD) SliceInst = SliceInst.getOperand(0); assert(SliceInst->getOpcode() == ISD::LOAD && "It takes more than a zext to get to the loaded slice!!"); @@ -11033,110 +11339,6 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } -namespace { -/// Helper struct to parse and store a memory address as base + index + offset. -/// We ignore sign extensions when it is safe to do so. -/// The following two expressions are not equivalent. To differentiate we need -/// to store whether there was a sign extension involved in the index -/// computation. -/// (load (i64 add (i64 copyfromreg %c) -/// (i64 signextend (add (i8 load %index) -/// (i8 1)))) -/// vs -/// -/// (load (i64 add (i64 copyfromreg %c) -/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) -/// (i32 1))))) -struct BaseIndexOffset { - SDValue Base; - SDValue Index; - int64_t Offset; - bool IsIndexSignExt; - - BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} - - BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, - bool IsIndexSignExt) : - Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} - - bool equalBaseIndex(const BaseIndexOffset &Other) { - return Other.Base == Base && Other.Index == Index && - Other.IsIndexSignExt == IsIndexSignExt; - } - - /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) { - bool IsIndexSignExt = false; - - // Split up a folded GlobalAddress+Offset into its component parts. - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr)) - if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) { - return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), - SDLoc(GA), - GA->getValueType(0), - /*Offset=*/0, - /*isTargetGA=*/false, - GA->getTargetFlags()), - SDValue(), - GA->getOffset(), - IsIndexSignExt); - } - - // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD - // instruction, then it could be just the BASE or everything else we don't - // know how to handle. Just use Ptr as BASE and give up. - if (Ptr->getOpcode() != ISD::ADD) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); - - // We know that we have at least an ADD instruction. Try to pattern match - // the simple case of BASE + OFFSET. - if (isa<ConstantSDNode>(Ptr->getOperand(1))) { - int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); - return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, - IsIndexSignExt); - } - - // Inside a loop the current BASE pointer is calculated using an ADD and a - // MUL instruction. In this case Ptr is the actual BASE pointer. - // (i64 add (i64 %array_ptr) - // (i64 mul (i64 %induction_var) - // (i64 %element_size))) - if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); - - // Look at Base + Index + Offset cases. - SDValue Base = Ptr->getOperand(0); - SDValue IndexOffset = Ptr->getOperand(1); - - // Skip signextends. - if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { - IndexOffset = IndexOffset->getOperand(0); - IsIndexSignExt = true; - } - - // Either the case of Base + Index (no offset) or something else. - if (IndexOffset->getOpcode() != ISD::ADD) - return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); - - // Now we have the case of Base + Index + offset. - SDValue Index = IndexOffset->getOperand(0); - SDValue Offset = IndexOffset->getOperand(1); - - if (!isa<ConstantSDNode>(Offset)) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); - - // Ignore signextends. - if (Index->getOpcode() == ISD::SIGN_EXTEND) { - Index = Index->getOperand(0); - IsIndexSignExt = true; - } else IsIndexSignExt = false; - - int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); - return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); - } -}; -} // namespace - // This is a helper function for visitMUL to check the profitability // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). // MulNode is the original multiply, AddNode is (add x, c1), @@ -11351,6 +11553,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( } } + StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end()); return true; } @@ -11493,7 +11696,8 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( return true; } -bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { +bool DAGCombiner::MergeConsecutiveStores( + StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) { if (OptLevel == CodeGenOpt::None) return false; @@ -11537,16 +11741,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // any of the store nodes. SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; - // Save the StoreSDNodes that we find in the chain. - SmallVector<MemOpLink, 8> StoreNodes; - getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); // Check if there is anything to merge. if (StoreNodes.size() < 2) return false; - // only do dep endence check in AA case + // only do dependence check in AA case bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : DAG.getSubtarget().useAA(); if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes)) @@ -11582,10 +11783,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Check if this store interferes with any of the loads that we found. // If we find a load that alias with this store. Stop the sequence. - if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(), - [&](LSBaseSDNode* Ldn) { - return isAlias(Ldn, StoreNodes[i].MemNode); - })) + if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) { + return isAlias(Ldn, StoreNodes[i].MemNode); + })) break; // Mark this node as useful. @@ -11899,6 +12099,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } } + StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end()); return true; } @@ -12088,11 +12289,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" - SDValue Shorter = - GetDemandedBits(Value, - APInt::getLowBitsSet( - Value.getValueType().getScalarType().getSizeInBits(), - ST->getMemoryVT().getScalarType().getSizeInBits())); + SDValue Shorter = GetDemandedBits( + Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), + ST->getMemoryVT().getScalarSizeInBits())); AddToWorklist(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, @@ -12100,10 +12299,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. - if (SimplifyDemandedBits(Value, - APInt::getLowBitsSet( - Value.getValueType().getScalarType().getSizeInBits(), - ST->getMemoryVT().getScalarType().getSizeInBits()))) + if (SimplifyDemandedBits( + Value, + APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), + ST->getMemoryVT().getScalarSizeInBits()))) return SDValue(N, 0); } @@ -12144,19 +12343,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Only perform this optimization before the types are legal, because we // don't want to perform this optimization on every DAGCombine invocation. if (!LegalTypes) { - bool EverChanged = false; - - do { + for (;;) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so // or until we merge the last store on the chain. - bool Changed = MergeConsecutiveStores(ST); - EverChanged |= Changed; + SmallVector<MemOpLink, 8> StoreNodes; + bool Changed = MergeConsecutiveStores(ST, StoreNodes); if (!Changed) break; - } while (ST->getOpcode() != ISD::DELETED_NODE); - if (EverChanged) - return SDValue(N, 0); + if (any_of(StoreNodes, + [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) { + // ST has been merged and no longer exists. + return SDValue(N, 0); + } + } } // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' @@ -12169,14 +12369,123 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return NewSt; } + if (SDValue NewSt = splitMergedValStore(ST)) + return NewSt; + return ReduceLoadOpStoreWidth(N); } +/// For the instruction sequence of store below, F and I values +/// are bundled together as an i64 value before being stored into memory. +/// Sometimes it is more efficent to generate separate stores for F and I, +/// which can remove the bitwise instructions or sink them to colder places. +/// +/// (store (or (zext (bitcast F to i32) to i64), +/// (shl (zext I to i64), 32)), addr) --> +/// (store F, addr) and (store I, addr+4) +/// +/// Similarly, splitting for other merged store can also be beneficial, like: +/// For pair of {i32, i32}, i64 store --> two i32 stores. +/// For pair of {i32, i16}, i64 store --> two i32 stores. +/// For pair of {i16, i16}, i32 store --> two i16 stores. +/// For pair of {i16, i8}, i32 store --> two i16 stores. +/// For pair of {i8, i8}, i16 store --> two i8 stores. +/// +/// We allow each target to determine specifically which kind of splitting is +/// supported. +/// +/// The store patterns are commonly seen from the simple code snippet below +/// if only std::make_pair(...) is sroa transformed before inlined into hoo. +/// void goo(const std::pair<int, float> &); +/// hoo() { +/// ... +/// goo(std::make_pair(tmp, ftmp)); +/// ... +/// } +/// +SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { + if (OptLevel == CodeGenOpt::None) + return SDValue(); + + SDValue Val = ST->getValue(); + SDLoc DL(ST); + + // Match OR operand. + if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR) + return SDValue(); + + // Match SHL operand and get Lower and Higher parts of Val. + SDValue Op1 = Val.getOperand(0); + SDValue Op2 = Val.getOperand(1); + SDValue Lo, Hi; + if (Op1.getOpcode() != ISD::SHL) { + std::swap(Op1, Op2); + if (Op1.getOpcode() != ISD::SHL) + return SDValue(); + } + Lo = Op2; + Hi = Op1.getOperand(0); + if (!Op1.hasOneUse()) + return SDValue(); + + // Match shift amount to HalfValBitSize. + unsigned HalfValBitSize = Val.getValueSizeInBits() / 2; + ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1)); + if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize) + return SDValue(); + + // Lo and Hi are zero-extended from int with size less equal than 32 + // to i64. + if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() || + !Lo.getOperand(0).getValueType().isScalarInteger() || + Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize || + Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() || + !Hi.getOperand(0).getValueType().isScalarInteger() || + Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize) + return SDValue(); + + // Use the EVT of low and high parts before bitcast as the input + // of target query. + EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST) + ? Lo.getOperand(0).getValueType() + : Lo.getValueType(); + EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST) + ? Hi.getOperand(0).getValueType() + : Hi.getValueType(); + if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) + return SDValue(); + + // Start to split store. + unsigned Alignment = ST->getAlignment(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); + AAMDNodes AAInfo = ST->getAAInfo(); + + // Change the sizes of Lo and Hi's value types to HalfValBitSize. + EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0)); + Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0)); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + // Lower value store. + SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), + ST->getAlignment(), MMOFlags, AAInfo); + Ptr = + DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType())); + // Higher value store. + SDValue St1 = + DAG.getStore(St0, DL, Hi, Ptr, + ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), + Alignment / 2, MMOFlags, AAInfo); + return St1; +} + SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); SDValue EltNo = N->getOperand(2); - SDLoc dl(N); + SDLoc DL(N); // If the inserted element is an UNDEF, just use the input vector. if (InVal.isUndef()) @@ -12206,7 +12515,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); if (Elt < OtherElt) { // Swap nodes. - SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, + SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InVec.getOperand(0), InVal, EltNo); AddToWorklist(NewOp.getNode()); return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), @@ -12237,13 +12546,13 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { EVT OpVT = Ops[0].getValueType(); if (InVal.getValueType() != OpVT) InVal = OpVT.bitsGT(InVal.getValueType()) ? - DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : - DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); + DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) : + DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal); Ops[Elt] = InVal; } // Return the new vector - return DAG.getBuildVector(VT, dl, Ops); + return DAG.getBuildVector(VT, DL, Ops); } SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( @@ -12544,7 +12853,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { return SDValue(); unsigned NumInScalars = N->getNumOperands(); - SDLoc dl(N); + SDLoc DL(N); EVT VT = N->getValueType(0); // Check to see if this is a BUILD_VECTOR of a bunch of values @@ -12603,7 +12912,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): - DAG.getConstant(0, SDLoc(N), SourceType); + DAG.getConstant(0, DL, SourceType); unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops(NewBVElems, Filler); @@ -12634,7 +12943,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!isTypeLegal(VecVT)) return SDValue(); // Make the new BUILD_VECTOR. - SDValue BV = DAG.getBuildVector(VecVT, dl, Ops); + SDValue BV = DAG.getBuildVector(VecVT, DL, Ops); // The new BUILD_VECTOR node has the potential to be further optimized. AddToWorklist(BV.getNode()); @@ -12646,7 +12955,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumInScalars = N->getNumOperands(); - SDLoc dl(N); + SDLoc DL(N); EVT SrcVT = MVT::Other; unsigned Opcode = ISD::DELETED_NODE; @@ -12707,30 +13016,126 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { else Opnds.push_back(In.getOperand(0)); } - SDValue BV = DAG.getBuildVector(NVT, dl, Opnds); + SDValue BV = DAG.getBuildVector(NVT, DL, Opnds); AddToWorklist(BV.getNode()); - return DAG.getNode(Opcode, dl, VT, BV); + return DAG.getNode(Opcode, DL, VT, BV); } -SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { - unsigned NumInScalars = N->getNumOperands(); - SDLoc dl(N); +SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N, + ArrayRef<int> VectorMask, + SDValue VecIn1, SDValue VecIn2, + unsigned LeftIdx) { + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy); + EVT VT = N->getValueType(0); + EVT InVT1 = VecIn1.getValueType(); + EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1; + + unsigned Vec2Offset = InVT1.getVectorNumElements(); + unsigned NumElems = VT.getVectorNumElements(); + unsigned ShuffleNumElems = NumElems; + + // We can't generate a shuffle node with mismatched input and output types. + // Try to make the types match the type of the output. + if (InVT1 != VT || InVT2 != VT) { + if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) { + // If the output vector length is a multiple of both input lengths, + // we can concatenate them and pad the rest with undefs. + unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits(); + assert(NumConcats >= 2 && "Concat needs at least two inputs!"); + SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1)); + ConcatOps[0] = VecIn1; + ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1); + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); + VecIn2 = SDValue(); + } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) { + if (!TLI.isExtractSubvectorCheap(VT, NumElems)) + return SDValue(); - // A vector built entirely of undefs is undef. - if (ISD::allOperandsUndef(N)) - return DAG.getUNDEF(VT); + if (!VecIn2.getNode()) { + // If we only have one input vector, and it's twice the size of the + // output, split it in two. + VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, + DAG.getConstant(NumElems, DL, IdxTy)); + VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx); + // Since we now have shorter input vectors, adjust the offset of the + // second vector's start. + Vec2Offset = NumElems; + } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) { + // VecIn1 is wider than the output, and we have another, possibly + // smaller input. Pad the smaller input with undefs, shuffle at the + // input vector width, and extract the output. + // The shuffle type is different than VT, so check legality again. + if (LegalOperations && + !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) + return SDValue(); - if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) - return V; + // Legalizing INSERT_SUBVECTOR is tricky - you basically have to + // lower it back into a BUILD_VECTOR. So if the inserted type is + // illegal, don't even try. + if (InVT1 != InVT2) { + if (!TLI.isTypeLegal(InVT2)) + return SDValue(); + VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, + DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); + } + ShuffleNumElems = NumElems * 2; + } else { + // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider + // than VecIn1. We can't handle this for now - this case will disappear + // when we start sorting the vectors by type. + return SDValue(); + } + } else { + // TODO: Support cases where the length mismatch isn't exactly by a + // factor of 2. + // TODO: Move this check upwards, so that if we have bad type + // mismatches, we don't create any DAG nodes. + return SDValue(); + } + } - if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) - return V; + // Initialize mask to undef. + SmallVector<int, 8> Mask(ShuffleNumElems, -1); + + // Only need to run up to the number of elements actually used, not the + // total number of elements in the shuffle - if we are shuffling a wider + // vector, the high lanes should be set to undef. + for (unsigned i = 0; i != NumElems; ++i) { + if (VectorMask[i] <= 0) + continue; + + unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1); + if (VectorMask[i] == (int)LeftIdx) { + Mask[i] = ExtIndex; + } else if (VectorMask[i] == (int)LeftIdx + 1) { + Mask[i] = Vec2Offset + ExtIndex; + } + } + + // The type the input vectors may have changed above. + InVT1 = VecIn1.getValueType(); + + // If we already have a VecIn2, it should have the same type as VecIn1. + // If we don't, get an undef/zero vector of the appropriate type. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1); + assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type."); + + SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask); + if (ShuffleNumElems > NumElems) + Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx); - // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT - // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from - // at most two distinct vectors, turn this into a shuffle node. + return Shuffle; +} + +// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT +// operations. If the types of the vectors we're extracting from allow it, +// turn this into a vector_shuffle node. +SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. if (!isTypeLegal(VT)) @@ -12740,149 +13145,169 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) return SDValue(); - SDValue VecIn1, VecIn2; bool UsesZeroVector = false; - for (unsigned i = 0; i != NumInScalars; ++i) { + unsigned NumElems = N->getNumOperands(); + + // Record, for each element of the newly built vector, which input vector + // that element comes from. -1 stands for undef, 0 for the zero vector, + // and positive values for the input vectors. + // VectorMask maps each element to its vector number, and VecIn maps vector + // numbers to their initial SDValues. + + SmallVector<int, 8> VectorMask(NumElems, -1); + SmallVector<SDValue, 8> VecIn; + VecIn.push_back(SDValue()); + + for (unsigned i = 0; i != NumElems; ++i) { SDValue Op = N->getOperand(i); - // Ignore undef inputs. - if (Op.isUndef()) continue; - // See if we can combine this build_vector into a blend with a zero vector. - if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) { + if (Op.isUndef()) + continue; + + // See if we can use a blend with a zero vector. + // TODO: Should we generalize this to a blend with an arbitrary constant + // vector? + if (isNullConstant(Op) || isNullFPConstant(Op)) { UsesZeroVector = true; + VectorMask[i] = 0; continue; } - // If this input is something other than a EXTRACT_VECTOR_ELT with a - // constant index, bail out. + // Not an undef or zero. If the input is something other than an + // EXTRACT_VECTOR_ELT with a constant index, bail out. if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa<ConstantSDNode>(Op.getOperand(1))) { - VecIn1 = VecIn2 = SDValue(nullptr, 0); - break; - } + !isa<ConstantSDNode>(Op.getOperand(1))) + return SDValue(); - // We allow up to two distinct input vectors. SDValue ExtractedFromVec = Op.getOperand(0); - if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) - continue; - if (!VecIn1.getNode()) { - VecIn1 = ExtractedFromVec; - } else if (!VecIn2.getNode() && !UsesZeroVector) { - VecIn2 = ExtractedFromVec; - } else { - // Too many inputs. - VecIn1 = VecIn2 = SDValue(nullptr, 0); - break; - } - } - - // If everything is good, we can make a shuffle operation. - if (VecIn1.getNode()) { - unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); - SmallVector<int, 8> Mask; - for (unsigned i = 0; i != NumInScalars; ++i) { - unsigned Opcode = N->getOperand(i).getOpcode(); - if (Opcode == ISD::UNDEF) { - Mask.push_back(-1); - continue; - } + // All inputs must have the same element type as the output. + if (VT.getVectorElementType() != + ExtractedFromVec.getValueType().getVectorElementType()) + return SDValue(); - // Operands can also be zero. - if (Opcode != ISD::EXTRACT_VECTOR_ELT) { - assert(UsesZeroVector && - (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) && - "Unexpected node found!"); - Mask.push_back(NumInScalars+i); - continue; - } + // Have we seen this input vector before? + // The vectors are expected to be tiny (usually 1 or 2 elements), so using + // a map back from SDValues to numbers isn't worth it. + unsigned Idx = std::distance( + VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec)); + if (Idx == VecIn.size()) + VecIn.push_back(ExtractedFromVec); - // If extracting from the first vector, just use the index directly. - SDValue Extract = N->getOperand(i); - SDValue ExtVal = Extract.getOperand(1); - unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); - if (Extract.getOperand(0) == VecIn1) { - Mask.push_back(ExtIndex); - continue; - } + VectorMask[i] = Idx; + } - // Otherwise, use InIdx + InputVecSize - Mask.push_back(InNumElements + ExtIndex); - } + // If we didn't find at least one input vector, bail out. + if (VecIn.size() < 2) + return SDValue(); - // Avoid introducing illegal shuffles with zero. - if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) + // TODO: We want to sort the vectors by descending length, so that adjacent + // pairs have similar length, and the longer vector is always first in the + // pair. + + // TODO: Should this fire if some of the input vectors has illegal type (like + // it does now), or should we let legalization run its course first? + + // Shuffle phase: + // Take pairs of vectors, and shuffle them so that the result has elements + // from these vectors in the correct places. + // For example, given: + // t10: i32 = extract_vector_elt t1, Constant:i64<0> + // t11: i32 = extract_vector_elt t2, Constant:i64<0> + // t12: i32 = extract_vector_elt t3, Constant:i64<0> + // t13: i32 = extract_vector_elt t1, Constant:i64<1> + // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13 + // We will generate: + // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2 + // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef + SmallVector<SDValue, 4> Shuffles; + for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) { + unsigned LeftIdx = 2 * In + 1; + SDValue VecLeft = VecIn[LeftIdx]; + SDValue VecRight = + (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue(); + + if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft, + VecRight, LeftIdx)) + Shuffles.push_back(Shuffle); + else return SDValue(); + } - // We can't generate a shuffle node with mismatched input and output types. - // Attempt to transform a single input vector to the correct type. - if ((VT != VecIn1.getValueType())) { - // If the input vector type has a different base type to the output - // vector type, bail out. - EVT VTElemType = VT.getVectorElementType(); - if ((VecIn1.getValueType().getVectorElementType() != VTElemType) || - (VecIn2.getNode() && - (VecIn2.getValueType().getVectorElementType() != VTElemType))) - return SDValue(); + // If we need the zero vector as an "ingredient" in the blend tree, add it + // to the list of shuffles. + if (UsesZeroVector) + Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT) + : DAG.getConstantFP(0.0, DL, VT)); - // If the input vector is too small, widen it. - // We only support widening of vectors which are half the size of the - // output registers. For example XMM->YMM widening on X86 with AVX. - EVT VecInT = VecIn1.getValueType(); - if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { - // If we only have one small input, widen it by adding undef values. - if (!VecIn2.getNode()) - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, - DAG.getUNDEF(VecIn1.getValueType())); - else if (VecIn1.getValueType() == VecIn2.getValueType()) { - // If we have two small inputs of the same type, try to concat them. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); - VecIn2 = SDValue(nullptr, 0); - } else - return SDValue(); - } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { - // If the input vector is too large, try to split it. - // We don't support having two input vectors that are too large. - // If the zero vector was used, we can not split the vector, - // since we'd need 3 inputs. - if (UsesZeroVector || VecIn2.getNode()) - return SDValue(); + // If we only have one shuffle, we're done. + if (Shuffles.size() == 1) + return Shuffles[0]; - if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) - return SDValue(); + // Update the vector mask to point to the post-shuffle vectors. + for (int &Vec : VectorMask) + if (Vec == 0) + Vec = Shuffles.size() - 1; + else + Vec = (Vec - 1) / 2; + + // More than one shuffle. Generate a binary tree of blends, e.g. if from + // the previous step we got the set of shuffles t10, t11, t12, t13, we will + // generate: + // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2 + // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4 + // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6 + // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8 + // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11 + // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13 + // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21 + + // Make sure the initial size of the shuffle list is even. + if (Shuffles.size() % 2) + Shuffles.push_back(DAG.getUNDEF(VT)); + + for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) { + if (CurSize % 2) { + Shuffles[CurSize] = DAG.getUNDEF(VT); + CurSize++; + } + for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) { + int Left = 2 * In; + int Right = 2 * In + 1; + SmallVector<int, 8> Mask(NumElems, -1); + for (unsigned i = 0; i != NumElems; ++i) { + if (VectorMask[i] == Left) { + Mask[i] = i; + VectorMask[i] = In; + } else if (VectorMask[i] == Right) { + Mask[i] = i + NumElems; + VectorMask[i] = In; + } + } - // Try to replace VecIn1 with two extract_subvectors - // No need to update the masks, they should still be correct. - VecIn2 = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(VT.getVectorNumElements(), dl, - TLI.getVectorIdxTy(DAG.getDataLayout()))); - VecIn1 = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - } else - return SDValue(); + Shuffles[In] = + DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask); } + } - if (UsesZeroVector) - VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) : - DAG.getConstantFP(0.0, dl, VT); - else - // If VecIn2 is unused then change it to undef. - VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + return Shuffles[0]; +} - // Check that we were able to transform all incoming values to the same - // type. - if (VecIn2.getValueType() != VecIn1.getValueType() || - VecIn1.getValueType() != VT) - return SDValue(); +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + EVT VT = N->getValueType(0); - // Return the new VECTOR_SHUFFLE node. - SDValue Ops[2]; - Ops[0] = VecIn1; - Ops[1] = VecIn2; - return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask); - } + // A vector built entirely of undefs is undef. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) + return V; + + if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) + return V; + + if (SDValue V = reduceBuildVecToShuffle(N)) + return V; return SDValue(); } @@ -13071,8 +13496,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) return SDValue(); - SDLoc dl = SDLoc(N); - SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); + SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar); return DAG.getBitcast(VT, Res); } } @@ -13208,7 +13632,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { V = V.getOperand(0); if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { - SDLoc dl(N); // Handle only simple case where vector being inserted and vector // being extracted are of same type, and are half size of larger vectors. EVT BigVT = V->getOperand(0).getValueType(); @@ -13228,11 +13651,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // Into: // indices are equal or bit offsets are equal => V1 // otherwise => (extract_subvec V1, ExtIdx) - if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == - ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) + if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() == + ExtIdx->getZExtValue() * NVT.getScalarSizeInBits()) return DAG.getBitcast(NVT, V->getOperand(1)); return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, NVT, + ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)), N->getOperand(1)); } @@ -13391,6 +13814,84 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } +// Attempt to combine a shuffle of 2 inputs of 'scalar sources' - +// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. +// +// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always +// a simplification in some sense, but it isn't appropriate in general: some +// BUILD_VECTORs are substantially cheaper than others. The general case +// of a BUILD_VECTOR requires inserting each element individually (or +// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of +// all constants is a single constant pool load. A BUILD_VECTOR where each +// element is identical is a splat. A BUILD_VECTOR where most of the operands +// are undef lowers to a small number of element insertions. +// +// To deal with this, we currently use a bunch of mostly arbitrary heuristics. +// We don't fold shuffles where one side is a non-zero constant, and we don't +// fold shuffles if the resulting BUILD_VECTOR would have duplicate +// non-constant operands. This seems to work out reasonably well in practice. +static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT = SVN->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + SDValue N0 = SVN->getOperand(0); + SDValue N1 = SVN->getOperand(1); + + if (!N0->hasOneUse() || !N1->hasOneUse()) + return SDValue(); + // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as + // discussed above. + if (!N1.isUndef()) { + bool N0AnyConst = isAnyConstantBuildVector(N0.getNode()); + bool N1AnyConst = isAnyConstantBuildVector(N1.getNode()); + if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode())) + return SDValue(); + if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode())) + return SDValue(); + } + + SmallVector<SDValue, 8> Ops; + SmallSet<SDValue, 16> DuplicateOps; + for (int M : SVN->getMask()) { + SDValue Op = DAG.getUNDEF(VT.getScalarType()); + if (M >= 0) { + int Idx = M < (int)NumElts ? M : M - NumElts; + SDValue &S = (M < (int)NumElts ? N0 : N1); + if (S.getOpcode() == ISD::BUILD_VECTOR) { + Op = S.getOperand(Idx); + } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) { + if (Idx == 0) + Op = S.getOperand(0); + } else { + // Operand can't be combined - bail out. + return SDValue(); + } + } + + // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is + // fine, but it's likely to generate low-quality code if the target can't + // reconstruct an appropriate shuffle. + if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op)) + if (!DuplicateOps.insert(Op).second) + return SDValue(); + + Ops.push_back(Op); + } + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + EVT SVT = VT.getScalarType(); + if (SVT.isInteger()) + for (SDValue &Op : Ops) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + if (SVT != VT.getScalarType()) + for (SDValue &Op : Ops) + Op = TLI.isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) + : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT); + return DAG.getBuildVector(VT, SDLoc(SVN), Ops); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -13506,40 +14007,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. - if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { - SmallVector<SDValue, 8> Ops; - for (int M : SVN->getMask()) { - SDValue Op = DAG.getUNDEF(VT.getScalarType()); - if (M >= 0) { - int Idx = M % NumElts; - SDValue &S = (M < (int)NumElts ? N0 : N1); - if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) { - Op = S.getOperand(Idx); - } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) { - if (Idx == 0) - Op = S.getOperand(0); - } else { - // Operand can't be combined - bail out. - break; - } - } - Ops.push_back(Op); - } - if (Ops.size() == VT.getVectorNumElements()) { - // BUILD_VECTOR requires all inputs to be of the same type, find the - // maximum type and extend them all. - EVT SVT = VT.getScalarType(); - if (SVT.isInteger()) - for (SDValue &Op : Ops) - SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); - if (SVT != VT.getScalarType()) - for (SDValue &Op : Ops) - Op = TLI.isZExtFree(Op.getValueType(), SVT) - ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT) - : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT); - return DAG.getBuildVector(VT, SDLoc(N), Ops); - } - } + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI)) + return Res; // If this shuffle only has a single input that is a bitcasted shuffle, // attempt to merge the 2 shuffles and suitably bitcast the inputs/output @@ -13647,6 +14117,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + // Don't try to fold splats; they're likely to simplify somehow, or they + // might be free. + if (OtherSV->isSplat()) + return SDValue(); + // The incoming shuffle must be of the same type as the result of the // current shuffle. assert(OtherSV->getOperand(0).getValueType() == VT && @@ -13773,10 +14248,20 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { } SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); + // Combine INSERT_SUBVECTORs where we are inserting to the same index. + // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) + // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) + if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && + N0.getOperand(1).getValueType() == N1.getValueType() && + N0.getOperand(2) == N2) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), + N1, N2); + if (N0.getValueType() != N1.getValueType()) return SDValue(); @@ -13785,7 +14270,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); - EVT VT = N->getValueType(0); // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors Z, Y) @@ -13836,7 +14320,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - SDLoc dl(N); + SDLoc DL(N); // Make sure we're not running after operation legalization where it // may have custom lowered the vector shuffles. @@ -13904,8 +14388,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) return SDValue(); - SDValue Zero = DAG.getConstant(0, dl, ClearVT); - return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl, + SDValue Zero = DAG.getConstant(0, DL, ClearVT); + return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL, DAG.getBitcast(ClearVT, LHS), Zero, Indices)); }; @@ -14119,6 +14603,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); if (!RLD->isInvariant()) MMOFlags &= ~MachineMemOperand::MOInvariant; + if (!RLD->isDereferenceable()) + MMOFlags &= ~MachineMemOperand::MODereferenceable; if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { // FIXME: Discards pointer and AA info. Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), @@ -14146,6 +14632,73 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, return false; } +/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and +/// bitwise 'and'. +SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, + SDValue N1, SDValue N2, SDValue N3, + ISD::CondCode CC) { + // If this is a select where the false operand is zero and the compare is a + // check of the sign bit, see if we can perform the "gzip trick": + // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A + // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A + EVT XType = N0.getValueType(); + EVT AType = N2.getValueType(); + if (!isNullConstant(N3) || !XType.bitsGE(AType)) + return SDValue(); + + // If the comparison is testing for a positive value, we have to invert + // the sign bit mask, so only do that transform if the target has a bitwise + // 'and not' instruction (the invert is free). + if (CC == ISD::SETGT && TLI.hasAndNot(N2)) { + // (X > -1) ? A : 0 + // (X > 0) ? X : 0 <-- This is canonical signed max. + if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2))) + return SDValue(); + } else if (CC == ISD::SETLT) { + // (X < 0) ? A : 0 + // (X < 1) ? X : 0 <-- This is un-canonicalized signed min. + if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2))) + return SDValue(); + } else { + return SDValue(); + } + + // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit + // constant. + EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); + auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { + unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; + SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); + SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); + AddToWorklist(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorklist(Shift.getNode()); + } + + if (CC == ISD::SETGT) + Shift = DAG.getNOT(DL, Shift, AType); + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + + SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy); + SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt); + AddToWorklist(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorklist(Shift.getNode()); + } + + if (CC == ISD::SETGT) + Shift = DAG.getNOT(DL, Shift, AType); + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); +} + /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, @@ -14242,48 +14795,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, } } - // Check to see if we can perform the "gzip trick", transforming - // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) - if (isNullConstant(N3) && CC == ISD::SETLT && - (isNullConstant(N1) || // (a < 0) ? b : 0 - (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0 - EVT XType = N0.getValueType(); - EVT AType = N2.getValueType(); - if (XType.bitsGE(AType)) { - // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a - // single-bit constant. - if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { - unsigned ShCtV = N2C->getAPIntValue().logBase2(); - ShCtV = XType.getSizeInBits() - ShCtV - 1; - SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0), - getShiftAmountTy(N0.getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), - XType, N0, ShCt); - AddToWorklist(Shift.getNode()); - - if (XType.bitsGT(AType)) { - Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); - AddToWorklist(Shift.getNode()); - } - - return DAG.getNode(ISD::AND, DL, AType, Shift, N2); - } - - SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), - XType, N0, - DAG.getConstant(XType.getSizeInBits() - 1, - SDLoc(N0), - getShiftAmountTy(N0.getValueType()))); - AddToWorklist(Shift.getNode()); - - if (XType.bitsGT(AType)) { - Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); - AddToWorklist(Shift.getNode()); - } - - return DAG.getNode(ISD::AND, DL, AType, Shift, N2); - } - } + if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC)) + return V; // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) // where y is has a single bit set. @@ -14511,30 +15024,51 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return S; } +/// Determines the LogBase2 value for a non-null input value using the +/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). +SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { + EVT VT = V.getValueType(); + unsigned EltBits = VT.getScalarSizeInBits(); + SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V); + SDValue Base = DAG.getConstant(EltBits - 1, DL, VT); + SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz); + return LogBase2; +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal, we need to find the zero of the function: +/// F(X) = A X - 1 [which has a zero at X = 1/A] +/// => +/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form +/// does not require additional intermediate precision] SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); - // Expose the DAG combiner to the target combiner implementations. - TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + // TODO: Handle half and/or extended types? + EVT VT = Op.getValueType(); + if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + return SDValue(); + + // If estimates are explicitly disabled for this function, we're done. + MachineFunction &MF = DAG.getMachineFunction(); + int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF); + if (Enabled == TLI.ReciprocalEstimate::Disabled) + return SDValue(); + + // Estimates may be explicitly enabled for this type with a custom number of + // refinement steps. + int Iterations = TLI.getDivRefinementSteps(VT, MF); + if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { + AddToWorklist(Est.getNode()); - unsigned Iterations = 0; - if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { if (Iterations) { - // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) - // For the reciprocal, we need to find the zero of the function: - // F(X) = A X - 1 [which has a zero at X = 1/A] - // => - // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form - // does not require additional intermediate precision] EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); - AddToWorklist(Est.getNode()); - // Newton iterations: Est = Est + Est (1 - Arg * Est) - for (unsigned i = 0; i < Iterations; ++i) { + for (int i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); AddToWorklist(NewEst.getNode()); @@ -14656,16 +15190,47 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, if (Level >= AfterLegalizeDAG) return SDValue(); - // Expose the DAG combiner to the target combiner implementations. - TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); - unsigned Iterations = 0; + // TODO: Handle half and/or extended types? + EVT VT = Op.getValueType(); + if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + return SDValue(); + + // If estimates are explicitly disabled for this function, we're done. + MachineFunction &MF = DAG.getMachineFunction(); + int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF); + if (Enabled == TLI.ReciprocalEstimate::Disabled) + return SDValue(); + + // Estimates may be explicitly enabled for this type with a custom number of + // refinement steps. + int Iterations = TLI.getSqrtRefinementSteps(VT, MF); + bool UseOneConstNR = false; - if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { + if (SDValue Est = + TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR, + Reciprocal)) { AddToWorklist(Est.getNode()); + if (Iterations) { Est = UseOneConstNR - ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) - : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); + ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); + + if (!Reciprocal) { + // Unfortunately, Est is now NaN if the input was exactly 0.0. + // Select out this case and force the answer to 0.0. + EVT VT = Op.getValueType(); + SDLoc DL(Op); + + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + EVT CCVT = getSetCCResultType(VT); + SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + + Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, + ZeroCmp, FPZero, Est); + AddToWorklist(Est.getNode()); + } } return Est; } @@ -14678,23 +15243,7 @@ SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { } SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) { - SDValue Est = buildSqrtEstimateImpl(Op, Flags, false); - if (!Est) - return SDValue(); - - // Unfortunately, Est is now NaN if the input was exactly 0. - // Select out this case and force the answer to 0. - EVT VT = Est.getValueType(); - SDLoc DL(Op); - SDValue Zero = DAG.getConstantFP(0.0, DL, VT); - EVT CCVT = getSetCCResultType(VT); - SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ); - AddToWorklist(ZeroCmp.getNode()); - - Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp, - Zero, Est); - AddToWorklist(Est.getNode()); - return Est; + return buildSqrtEstimateImpl(Op, Flags, false); } /// Return true if base is a frame index, which is known not to alias with @@ -14771,9 +15320,9 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // To catch this case, look up the actual index of frame indices to compute // the real alias relationship. if (isFrameIndex1 && isFrameIndex2) { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); - Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); + Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); } |