diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r-- | contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1703 |
1 files changed, 964 insertions, 739 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c741982..5ecc6da 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -112,7 +113,7 @@ namespace { /// /// This is used to allow us to reliably add any operands of a DAG node /// which have not yet been combined to the worklist. - SmallPtrSet<SDNode *, 64> CombinedNodes; + SmallPtrSet<SDNode *, 32> CombinedNodes; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; @@ -211,8 +212,8 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); - void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, - SDValue Trunc, SDValue ExtLoad, SDLoc DL, + void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc, + SDValue ExtLoad, const SDLoc &DL, ISD::NodeType ExtType); /// Call the node-specific routine that knows how to fold each @@ -258,6 +259,7 @@ namespace { SDValue visitSRL(SDNode *N); SDValue visitRotate(SDNode *N); SDValue visitBSWAP(SDNode *N); + SDValue visitBITREVERSE(SDNode *N); SDValue visitCTLZ(SDNode *N); SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); @@ -273,6 +275,7 @@ namespace { SDValue visitANY_EXTEND(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); + SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); @@ -326,18 +329,19 @@ namespace { SDValue visitFMULForFMACombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); - SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); + SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS, + SDValue RHS); SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); - SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); - SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, - SDValue N3, ISD::CondCode CC, + SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - SDLoc DL, bool foldBooleans = true); + const SDLoc &DL, bool foldBooleans = true); bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; @@ -353,19 +357,21 @@ namespace { SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags); - SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags); + SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip); + SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags, bool Reciprocal); + SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags, bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, - SDLoc DL); - SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); + const SDLoc &DL); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue TransformFPLoadStorePair(SDNode *N); @@ -386,10 +392,17 @@ namespace { /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); - /// Do FindBetterChain for a store and any possibly adjacent stores on - /// consecutive chains. + /// Try to replace a store and any possibly adjacent stores on + /// consecutive chains with better chains. Return true only if St is + /// replaced. + /// + /// Notice that other chains may still be replaced even if the function + /// returns false. bool findBetterNeighborChains(StoreSDNode *St); + /// Match "(X shl/srl V1) & V2" where V2 may not be present. + bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask); + /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { @@ -414,8 +427,7 @@ namespace { /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a /// constant build_vector of the stored constant values in Stores. - SDValue getMergedConstantVectorStore(SelectionDAG &DAG, - SDLoc SL, + SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores, SmallVectorImpl<SDValue> &Chains, EVT Ty) const; @@ -444,6 +456,12 @@ namespace { StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); + /// Helper function for MergeConsecutiveStores. Checks if + /// Candidate stores have indirect dependency through their + /// operands. \return True if safe to merge + bool checkMergeStoreCandidatesForDependencies( + SmallVectorImpl<MemOpLink> &StoreNodes); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -747,32 +765,6 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const { return false; } -/// Returns true if N is a BUILD_VECTOR node whose -/// elements are all the same constant or undefined. -static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { - BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); - if (!C) - return false; - - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - EVT EltVT = N->getValueType(0).getVectorElementType(); - return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, - HasAnyUndefs) && - EltVT.getSizeInBits() >= SplatBitSize); -} - -// \brief Returns the SDNode if it is a constant integer BuildVector -// or constant integer. -static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { - if (isa<ConstantSDNode>(N)) - return N.getNode(); - if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) - return N.getNode(); - return nullptr; -} - // \brief Returns the SDNode if it is a constant float BuildVector // or constant float. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { @@ -821,12 +813,12 @@ static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { return nullptr; } -SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, - SDValue N0, SDValue N1) { +SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc) { - if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { - if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { + if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); @@ -845,17 +837,17 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, } if (N1.getOpcode() == Opc) { - if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { - if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { + if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); return SDValue(); } if (N1.hasOneUse()) { - // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one + // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one // use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0)); if (!OpNode.getNode()) return SDValue(); AddToWorklist(OpNode.getNode()); @@ -962,7 +954,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = false; SDLoc dl(Op); - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + if (ISD::isUNINDEXEDLoad(Op.getNode())) { + LoadSDNode *LD = cast<LoadSDNode>(Op); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD @@ -1166,6 +1159,9 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { if (!LegalOperations) return false; + if (!ISD::isUNINDEXEDLoad(Op.getNode())) + return false; + EVT VT = Op.getValueType(); if (VT.isVector() || !VT.isInteger()) return false; @@ -1259,8 +1255,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // changes of the root. HandleSDNode Dummy(DAG.getRoot()); - // while the worklist isn't empty, find a node and - // try and combine it. + // While the worklist isn't empty, find a node and try to combine it. while (!WorklistMap.empty()) { SDNode *N; // The Worklist holds the SDNodes in order, but it may contain null entries. @@ -1326,8 +1321,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) { DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); - // Transfer debug value. - DAG.TransferDbgValues(SDValue(N, 0), RV); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { @@ -1388,6 +1381,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ROTR: case ISD::ROTL: return visitRotate(N); case ISD::BSWAP: return visitBSWAP(N); + case ISD::BITREVERSE: return visitBITREVERSE(N); case ISD::CTLZ: return visitCTLZ(N); case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); @@ -1403,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); + case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); @@ -1628,8 +1623,8 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } -/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a -/// ContantSDNode pointer else nullptr. +/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a +/// ConstantSDNode pointer else nullptr. static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); return Const != nullptr && !Const->isOpaque() ? Const : nullptr; @@ -1653,38 +1648,32 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } // fold (add x, undef) -> undef - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return N0; - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; - // fold (add c1, c2) -> c1+c2 - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); - // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { + // canonicalize constant to RHS + if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); + // fold (add c1, c2) -> c1+c2 + return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, + N0.getNode(), N1.getNode()); + } // fold (add x, 0) -> x if (isNullConstant(N1)) return N0; - // fold (add Sym, c) -> Sym+c - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) - if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && - GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, - GA->getOffset() + - (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A - if (N1C && N0.getOpcode() == ISD::SUB) - if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { - SDLoc DL(N); - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(N1C->getAPIntValue()+ - N0C->getAPIntValue(), DL, VT), - N0.getOperand(1)); - } + if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) { + if (N0.getOpcode() == ISD::SUB) + if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(N1C->getAPIntValue()+ + N0C->getAPIntValue(), DL, VT), + N0.getOperand(1)); + } + } // reassociate add if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) return RADD; @@ -1850,9 +1839,9 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. -static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, - SelectionDAG &DAG, - bool LegalOperations, bool LegalTypes) { +static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, + SelectionDAG &DAG, bool LegalOperations, + bool LegalTypes) { if (!VT.isVector()) return DAG.getConstant(0, DL, VT); if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) @@ -1879,11 +1868,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); - // fold (sub c1, c2) -> c1-c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + // fold (sub c1, c2) -> c1-c2 + return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, + N0.getNode(), N1.getNode()); + } ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) if (N1C) { SDLoc DL(N); @@ -1933,9 +1925,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { N0.getOperand(0), N0.getOperand(1).getOperand(0)); // If either operand of a sub is undef, the result is undef - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return N0; - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; // If the relocation model supports it, consider symbol offsets. @@ -2013,7 +2005,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { EVT VT = N0.getValueType(); // fold (mul x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); bool N0IsConst = false; @@ -2026,8 +2018,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); - N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); + N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); + N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); } else { N0IsConst = isa<ConstantSDNode>(N0); if (N0IsConst) { @@ -2047,8 +2039,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0.getNode(), N1.getNode()); // canonicalize constant to RHS (vector doesn't have to splat) - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1 == 0) @@ -2091,23 +2083,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { APInt Val; // (mul (shl X, c1), c2) -> (mul X, c2 << c1) if (N1IsConst && N0.getOpcode() == ISD::SHL && - (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa<ConstantSDNode>(N0.getOperand(1)))) { - SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, - N1, N0.getOperand(1)); + (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1)))) { + SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); AddToWorklist(C3.getNode()); - return DAG.getNode(ISD::MUL, SDLoc(N), VT, - N0.getOperand(0), C3); + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one // use. { - SDValue Sh(nullptr,0), Y(nullptr,0); + SDValue Sh(nullptr, 0), Y(nullptr, 0); // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && - (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa<ConstantSDNode>(N0.getOperand(1))) && + (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1))) && N0.getNode()->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && @@ -2117,17 +2107,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } if (Sh.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, - Sh.getOperand(0), Y); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, - Mul, Sh.getOperand(1)); + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); } } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (isConstantIntBuildVectorOrConstantInt(N1) && + if (DAG.isConstantIntBuildVectorOrConstantInt(N1) && N0.getOpcode() == ISD::ADD && - isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && + DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && isMulAddWithConstProfitable(N, N0, N1)) return DAG.getNode(ISD::ADD, SDLoc(N), VT, DAG.getNode(ISD::MUL, SDLoc(N0), VT, @@ -2146,7 +2134,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getSimpleValueType(0).SimpleTy) { + EVT NodeType = Node->getValueType(0); + if (!NodeType.isSimple()) + return false; + switch (NodeType.getSimpleVT().SimpleTy) { default: return false; // No libcall for vector types. case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2163,14 +2154,18 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { if (Node->use_empty()) return SDValue(); // This is a dead node, leave it alone. + unsigned Opcode = Node->getOpcode(); + bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + + // DivMod lib calls can still work on non-legal types if using lib-calls. EVT VT = Node->getValueType(0); - if (!TLI.isTypeLegal(VT)) + if (VT.isVector() || !VT.isInteger()) return SDValue(); - unsigned Opcode = Node->getOpcode(); - bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); + if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT)) + return SDValue(); - unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; // If DIVREM is going to get expanded into a libcall, // but there is no libcall available, then don't combine. if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && @@ -2314,10 +2309,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return DivRem; // undef / X -> 0 - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // X / undef -> undef - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; return SDValue(); @@ -2378,10 +2373,10 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { return DivRem; // undef / X -> 0 - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // X / undef -> undef - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; return SDValue(); @@ -2419,15 +2414,13 @@ SDValue DAGCombiner::visitREM(SDNode *N) { } // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { - if (SHC->getAPIntValue().isPowerOf2()) { - SDValue Add = - DAG.getNode(ISD::ADD, DL, VT, N1, - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, - VT)); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::AND, DL, VT, N0, Add); - } + ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0)); + if (SHC && SHC->getAPIntValue().isPowerOf2()) { + APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits()); + SDValue Add = + DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); } } } @@ -2462,10 +2455,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) { return DivRem.getValue(1); // undef % X -> 0 - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // X % undef -> undef - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; return SDValue(); @@ -2489,7 +2482,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { getShiftAmountTy(N0.getValueType()))); } // fold (mulhs x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // If the type twice as wide is legal, transform the mulhs to a wider multiply @@ -2525,7 +2518,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (isOneConstant(N1)) return DAG.getConstant(0, DL, N0.getValueType()); // fold (mulhu x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); // If the type twice as wide is legal, transform the mulhu to a wider multiply @@ -2698,8 +2691,8 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); return SDValue(); @@ -2761,7 +2754,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { } // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) - // Only perform this optimization after type legalization and before + // Only perform this optimization up until type legalization, before // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and // we don't want to undo this promotion. @@ -2769,7 +2762,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // on scalars. if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && - Level == AfterLegalizeTypes) { + Level <= AfterLegalizeTypes) { SDValue In0 = N0.getOperand(0); SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); @@ -2814,7 +2807,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // Don't try to fold this node if it requires introducing a // build vector of all zeros that might be illegal at this stage. - if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { if (!LegalTypes) ShOp = DAG.getConstant(0, SDLoc(N), VT); else @@ -2829,13 +2822,13 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { N0->getOperand(0), N1->getOperand(0)); AddToWorklist(NewNode.getNode()); return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, - &SVN0->getMask()[0]); + SVN0->getMask()); } // Don't try to fold this node if it requires introducing a // build vector of all zeros that might be illegal at this stage. ShOp = N0->getOperand(0); - if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { if (!LegalTypes) ShOp = DAG.getConstant(0, SDLoc(N), VT); else @@ -2850,7 +2843,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { N0->getOperand(1), N1->getOperand(1)); AddToWorklist(NewNode.getNode()); return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, - &SVN0->getMask()[0]); + SVN0->getMask()); } } } @@ -2867,7 +2860,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, EVT VT = N1.getValueType(); // fold (and x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(LocReference), VT); // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) SDValue LL, LR, RL, RR, CC0, CC1; @@ -2965,6 +2958,50 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, } } + // Reduce bit extract of low half of an integer to the narrower type. + // (and (srl i64:x, K), KMask) -> + // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask) + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) { + if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + unsigned Size = VT.getSizeInBits(); + const APInt &AndMask = CAnd->getAPIntValue(); + unsigned ShiftBits = CShift->getZExtValue(); + unsigned MaskBits = AndMask.countTrailingOnes(); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); + + if (APIntOps::isMask(AndMask) && + // Required bits must not span the two halves of the integer and + // must fit in the half size type. + (ShiftBits + MaskBits <= Size / 2) && + TLI.isNarrowingProfitable(VT, HalfVT) && + TLI.isTypeDesirableForOp(ISD::AND, HalfVT) && + TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) && + TLI.isTruncateFree(VT, HalfVT) && + TLI.isZExtFree(HalfVT, VT)) { + // The isNarrowingProfitable is to avoid regressions on PPC and + // AArch64 which match a few 64-bit bit insert / bit extract patterns + // on downstream users of this. Those patterns could probably be + // extended to handle extensions mixed in. + + SDValue SL(N0); + assert(ShiftBits != 0 && MaskBits <= Size); + + // Extracting the highest bit of the low half. + EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT, + N0.getOperand(0)); + + SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT); + SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT); + SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK); + SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask); + return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And); + } + } + } + } + return SDValue(); } @@ -3045,8 +3082,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x if (isAllOnesConstant(N1)) @@ -3090,8 +3127,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // the 'X' node here can either be nothing or an extract_vector_elt to catch // more cases. if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - N0.getOperand(0).getOpcode() == ISD::LOAD) || - N0.getOpcode() == ISD::LOAD) { + N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() && + N0.getOperand(0).getOpcode() == ISD::LOAD && + N0.getOperand(0).getResNo() == 0) || + (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) { LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0) ); @@ -3234,12 +3273,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { AddToWorklist(NewPtr.getNode()); - SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, - LN0->getChain(), NewPtr, - LN0->getPointerInfo(), - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Alignment, LN0->getAAInfo()); + SDValue Load = DAG.getExtLoad( + ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, + LN0->getPointerInfo(), ExtVT, Alignment, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); AddToWorklist(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -3303,9 +3340,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { - SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), - N0.getOperand(1), false); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false)) return BSwap; } @@ -3576,7 +3612,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { EVT VT = N1.getValueType(); // fold (or x, undef) -> -1 if (!LegalOperations && - (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { + (N0.isUndef() || N1.isUndef())) { EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), SDLoc(LocReference), VT); @@ -3697,59 +3733,70 @@ SDValue DAGCombiner::visitOR(SDNode *N) { N1.getValueType().getScalarType().getSizeInBits()), SDLoc(N), N1.getValueType()); - // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) - // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) // Do this only if the resulting shuffle is legal. if (isa<ShuffleVectorSDNode>(N0) && isa<ShuffleVectorSDNode>(N1) && // Avoid folding a node with illegal type. - TLI.isTypeLegal(VT) && - N0->getOperand(1) == N1->getOperand(1) && - ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { - bool CanFold = true; - unsigned NumElts = VT.getVectorNumElements(); - const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); - const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); - // We construct two shuffle masks: - // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand - // and N1 as the second operand. - // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand - // and N0 as the second operand. - // We do this because OR is commutable and therefore there might be - // two ways to fold this node into a shuffle. - SmallVector<int,4> Mask1; - SmallVector<int,4> Mask2; - - for (unsigned i = 0; i != NumElts && CanFold; ++i) { - int M0 = SV0->getMaskElt(i); - int M1 = SV1->getMaskElt(i); - - // Both shuffle indexes are undef. Propagate Undef. - if (M0 < 0 && M1 < 0) { - Mask1.push_back(M0); - Mask2.push_back(M0); - continue; - } + TLI.isTypeLegal(VT)) { + bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); + bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); + bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); + bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); + // Ensure both shuffles have a zero input. + if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) { + assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); + assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); + const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); + const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); + bool CanFold = true; + int NumElts = VT.getVectorNumElements(); + SmallVector<int, 4> Mask(NumElts); + + for (int i = 0; i != NumElts; ++i) { + int M0 = SV0->getMaskElt(i); + int M1 = SV1->getMaskElt(i); + + // Determine if either index is pointing to a zero vector. + bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts)); + bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts)); + + // If one element is zero and the otherside is undef, keep undef. + // This also handles the case that both are undef. + if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) { + Mask[i] = -1; + continue; + } - if (M0 < 0 || M1 < 0 || - (M0 < (int)NumElts && M1 < (int)NumElts) || - (M0 >= (int)NumElts && M1 >= (int)NumElts)) { - CanFold = false; - break; + // Make sure only one of the elements is zero. + if (M0Zero == M1Zero) { + CanFold = false; + break; + } + + assert((M0 >= 0 || M1 >= 0) && "Undef index!"); + + // We have a zero and non-zero element. If the non-zero came from + // SV0 make the index a LHS index. If it came from SV1, make it + // a RHS index. We need to mod by NumElts because we don't care + // which operand it came from in the original shuffles. + Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts; } - Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); - Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); - } + if (CanFold) { + SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); + SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); + + bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); + if (!LegalMask) { + std::swap(NewLHS, NewRHS); + ShuffleVectorSDNode::commuteMask(Mask); + LegalMask = TLI.isShuffleMaskLegal(Mask, VT); + } - if (CanFold) { - // Fold this sequence only if the resulting shuffle is 'legal'. - if (TLI.isShuffleMaskLegal(Mask1, VT)) - return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), - N1->getOperand(0), &Mask1[0]); - if (TLI.isShuffleMaskLegal(Mask2, VT)) - return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), - N0->getOperand(0), &Mask2[0]); + if (LegalMask) + return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); + } } } } @@ -3760,8 +3807,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x if (isNullConstant(N1)) @@ -3817,9 +3864,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } /// Match "(X shl/srl V1) & V2" where V2 may not be present. -static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { +bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { - if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { + if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { Mask = Op.getOperand(1); Op = Op.getOperand(0); } else { @@ -3946,7 +3993,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, - unsigned NegOpcode, SDLoc DL) { + unsigned NegOpcode, const SDLoc &DL) { // fold (or (shl x, (*ext y)), // (srl x, (*ext (sub 32, y)))) -> // (rotl x, y) or (rotr x, (sub 32, y)) @@ -3967,7 +4014,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return nullptr; @@ -4093,12 +4140,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (xor undef, undef) -> 0. This is a common idiom (misuse). - if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() && N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // fold (xor x, undef) -> undef - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return N0; - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; // fold (xor c1, c2) -> c1^c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); @@ -4106,8 +4153,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x if (isNullConstant(N1)) @@ -4342,8 +4389,8 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = + distributeTruncateThroughAnd(N->getOperand(1).getNode())) return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), N->getOperand(0), NewOp1); } @@ -4398,7 +4445,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N1C && N1C->isNullValue()) return N0; // fold (shl undef, x) -> 0 - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), @@ -4407,8 +4454,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } @@ -4541,7 +4587,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { APInt Val; if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && (isa<ConstantSDNode>(N0.getOperand(1)) || - isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { + ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); @@ -4637,7 +4683,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); // Determine the residual right-shift amount. - signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); + int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); // If the shift is not a no-op (in which case this should be just a sign // extend already), the truncated to type is legal, sign_extend is legal @@ -4664,8 +4710,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); } @@ -4916,7 +4961,7 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { EVT VT = N->getValueType(0); // fold (bswap c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); // fold (bswap (bswap x)) -> x if (N0.getOpcode() == ISD::BSWAP) @@ -4924,12 +4969,21 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold (bitreverse (bitreverse x)) -> x + if (N0.getOpcode() == ISD::BITREVERSE) + return N0.getOperand(0); + return SDValue(); +} + SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4939,7 +4993,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctlz_zero_undef c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4949,7 +5003,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4959,7 +5013,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz_zero_undef c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4969,15 +5023,15 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } /// \brief Generate Min/Max node -static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, - SDValue True, SDValue False, +static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG) { if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) @@ -5237,7 +5291,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { // length of the BV and see if all the non-undef nodes are the same. ConstantSDNode *BottomHalf = nullptr; for (int i = 0; i < NumElems / 2; ++i) { - if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + if (Cond->getOperand(i)->isUndef()) continue; if (BottomHalf == nullptr) @@ -5249,7 +5303,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { // Do the same for the second half of the BuildVector ConstantSDNode *TopHalf = nullptr; for (int i = NumElems / 2; i < NumElems; ++i) { - if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + if (Cond->getOperand(i)->isUndef()) continue; if (TopHalf == nullptr) @@ -5666,9 +5720,8 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // Determine if the condition we're dealing with is constant - SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), - N0, N1, CC, SDLoc(N), false); - if (SCC.getNode()) { + if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, + CC, SDLoc(N), false)) { AddToWorklist(SCC.getNode()); if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { @@ -5676,7 +5729,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // cond always true -> true val else return N3; // cond always false -> false val - } else if (SCC->getOpcode() == ISD::UNDEF) { + } else if (SCC->isUndef()) { // When the condition is UNDEF, just return the first operand. This is // coherent the DAG creation, no setcc node is created in this case return N2; @@ -5729,7 +5782,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, EVT VT = N->getValueType(0); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || - Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) + Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || + Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"); // fold (sext c1) -> c1 @@ -5756,7 +5810,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, for (unsigned i=0; i != NumElts; ++i) { SDValue Op = N0->getOperand(i); - if (Op->getOpcode() == ISD::UNDEF) { + if (Op->isUndef()) { Elts.push_back(DAG.getUNDEF(SVT)); continue; } @@ -5771,7 +5825,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); } - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); + return DAG.getBuildVector(VT, DL, Elts).getNode(); } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: @@ -5839,8 +5893,8 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, } void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, - SDValue Trunc, SDValue ExtLoad, SDLoc DL, - ISD::NodeType ExtType) { + SDValue Trunc, SDValue ExtLoad, + const SDLoc &DL, ISD::NodeType ExtType) { // Extend SetCC uses if necessary. for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { SDNode *SetCC = SetCCs[i]; @@ -5929,9 +5983,8 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue SplitLoad = DAG.getExtLoad( ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, - LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, - LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), - Align, LN0->getAAInfo()); + LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, DAG.getConstant(Stride, DL, BasePtr.getValueType())); @@ -6145,16 +6198,30 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } - // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) - unsigned ElementWidth = VT.getScalarType().getSizeInBits(); + // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) + // Here, T can be 1 or -1, depending on the type of the setcc and + // getBooleanContents(). + unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits(); + SDLoc DL(N); - SDValue NegOne = - DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT); - SDValue SCC = - SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), - NegOne, DAG.getConstant(0, DL, VT), - cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); - if (SCC.getNode()) return SCC; + // To determine the "true" side of the select, we need to know the high bit + // of the value returned by the setcc if it evaluates to true. + // If the type of the setcc is i1, then the true case of the select is just + // sext(i1 1), that is, -1. + // If the type of the setcc is larger (say, i8) then the value of the high + // bit depends on getBooleanContents(). So, ask TLI for a real "true" value + // of the appropriate width. + SDValue ExtTrueVal = + (SetCCWidth == 1) + ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()), + DL, VT) + : TLI.getConstTrueVal(DAG, VT, DL); + + if (SDValue SCC = SimplifySelectCC( + DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal, + DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) + return SCC; if (!VT.isVector()) { EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); @@ -6162,10 +6229,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) { SDLoc DL(N); ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - SDValue SetCC = DAG.getSetCC(DL, SetCCVT, - N0.getOperand(0), N0.getOperand(1), CC); - return DAG.getSelect(DL, VT, SetCC, - NegOne, DAG.getConstant(0, DL, VT)); + SDValue SetCC = + DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC); + return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, + DAG.getConstant(0, DL, VT)); } } } @@ -6436,56 +6503,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { + // Only do this before legalize for now. if (!LegalOperations && VT.isVector() && N0.getValueType().getVectorElementType() == MVT::i1) { - EVT N0VT = N0.getOperand(0).getValueType(); - if (getSetCCResultType(N0VT) == N0.getValueType()) + EVT N00VT = N0.getOperand(0).getValueType(); + if (getSetCCResultType(N00VT) == N0.getValueType()) return SDValue(); - // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. - // Only do this before legalize for now. - EVT EltVT = VT.getVectorElementType(); + // We know that the # elements of the results is the same as the # + // elements of the compare (and the # elements of the compare result for + // that matter). Check to see that they are the same size. If so, we know + // that the element size of the sext'd result matches the element size of + // the compare operands. SDLoc DL(N); - SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), - DAG.getConstant(1, DL, EltVT)); - if (VT.getSizeInBits() == N0VT.getSizeInBits()) - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - return DAG.getNode(ISD::AND, DL, VT, - DAG.getSetCC(DL, VT, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), - DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - OneOps)); + SDValue VecOnes = DAG.getConstant(1, DL, VT); + if (VT.getSizeInBits() == N00VT.getSizeInBits()) { + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. + SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0), + N0.getOperand(1), N0.getOperand(2)); + return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes); + } // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then - // truncate/sign extend - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); + // truncate/sign extend. + EVT MatchingElementType = EVT::getIntegerVT( + *DAG.getContext(), N00VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = EVT::getVectorVT( + *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, DL, VT, - DAG.getSExtOrTrunc(VsetCC, DL, VT), - DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); + DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), N0.getOperand(2)); + return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT), + VecOnes); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDLoc DL(N); - SDValue SCC = - SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), - cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); - if (SCC.getNode()) return SCC; + if (SDValue SCC = SimplifySelectCC( + DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) + return SCC; } // (zext (shl (zext x), cst)) -> (shl (zext x), cst) @@ -6660,11 +6719,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDLoc DL(N); - SDValue SCC = - SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), - cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); - if (SCC.getNode()) + if (SDValue SCC = SimplifySelectCC( + DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) return SCC; } @@ -6854,15 +6912,14 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue Load; if (ExtType == ISD::NON_EXTLOAD) - Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign, LN0->getAAInfo()); + Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); else - Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign, LN0->getAAInfo()); + Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, + NewAlign, LN0->getMemOperand()->getFlags(), + LN0->getAAInfo()); // Replace the old load's chain with the new load's chain. WorklistRemover DeadNodes(*this); @@ -6902,7 +6959,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getUNDEF(VT); // fold (sext_in_reg c1) -> c1 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. @@ -6988,9 +7045,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { - SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), - N0.getOperand(1), false); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } @@ -7002,7 +7058,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) + return DAG.getUNDEF(VT); + + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + + return SDValue(); +} + +SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.isUndef()) return DAG.getUNDEF(VT); if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, @@ -7021,7 +7091,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getValueType() == N->getValueType(0)) return N0; // fold (truncate c1) -> c1 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) @@ -7030,12 +7100,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { + // if the source is smaller than the dest, we still need an extend. if (N0.getOperand(0).getValueType().bitsLT(VT)) - // if the source is smaller than the dest, we still need an extend - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, - N0.getOperand(0)); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); + // if the source is larger than the dest, than we just need the truncate. if (N0.getOperand(0).getValueType().bitsGT(VT)) - // if the source is larger than the dest, than we just need the truncate return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // if the source and dest are the same type, we can drop both the extend // and the truncate. @@ -7071,12 +7140,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); - SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), - NVT, N0.getOperand(0)); - SDLoc DL(N); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, TrTy, V, + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, + DAG.getBitcast(NVT, N0.getOperand(0)), DAG.getConstant(Index, DL, IndexTy)); } } @@ -7094,6 +7160,25 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2 + if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && + TLI.isTypeDesirableForOp(ISD::SHL, VT)) { + if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t Amt = CAmt->getZExtValue(); + unsigned Size = VT.getSizeInBits(); + + if (Amt < Size / 2) { + SDLoc SL(N); + EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::SHL, SL, VT, Trunc, + DAG.getConstant(Amt, SL, AmtVT)); + } + } + } + // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to @@ -7121,7 +7206,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); + return DAG.getBuildVector(VT, SDLoc(N), Opnds); } } @@ -7131,10 +7216,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Currently we only perform this optimization on scalars because vectors // may have different active low bits. if (!VT.isVector()) { - SDValue Shorter = - GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), - VT.getSizeInBits())); - if (Shorter.getNode()) + if (SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits()))) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); } // fold (truncate (load x)) -> (smaller load x) @@ -7168,7 +7252,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { SDValue X = N0.getOperand(i); - if (X.getOpcode() != ISD::UNDEF) { + if (!X.isUndef()) { V = X; Idx = i; NumDefs++; @@ -7200,6 +7284,24 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // Fold truncate of a bitcast of a vector to an extract of the low vector + // element. + // + // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0 + if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { + SDValue VecSrc = N0.getOperand(0); + EVT SrcVT = VecSrc.getValueType(); + if (SrcVT.isVector() && SrcVT.getScalarType() == VT && + (!LegalOperations || + TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) { + SDLoc SL(N); + + EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, + VecSrc, DAG.getConstant(0, SL, IdxVT)); + } + } + // Simplify the operands using demanded-bits information. if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) @@ -7226,23 +7328,17 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); - - if (ISD::isNON_EXTLoad(LD2) && - LD2->hasOneUse() && - // If both are volatile this would reduce the number of volatile loads. - // If one is volatile it might be ok, but play conservative and bail out. - !LD1->isVolatile() && - !LD2->isVolatile() && - DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { + unsigned LD1Bytes = LD1VT.getSizeInBits() / 8; + if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && + DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { unsigned Align = LD1->getAlignment(); unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), - LD1->getBasePtr(), LD1->getPointerInfo(), - false, false, false, Align); + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), + LD1->getPointerInfo(), Align); } return SDValue(); @@ -7254,6 +7350,49 @@ static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { return DAG.getDataLayout().isBigEndian() ? 1 : 0; } +static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI) { + // If this is not a bitcast to an FP type or if the target doesn't have + // IEEE754-compliant FP logic, we're done. + EVT VT = N->getValueType(0); + if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT)) + return SDValue(); + + // TODO: Use splat values for the constant-checking below and remove this + // restriction. + SDValue N0 = N->getOperand(0); + EVT SourceVT = N0.getValueType(); + if (SourceVT.isVector()) + return SDValue(); + + unsigned FPOpcode; + APInt SignMask; + switch (N0.getOpcode()) { + case ISD::AND: + FPOpcode = ISD::FABS; + SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits()); + break; + case ISD::XOR: + FPOpcode = ISD::FNEG; + SignMask = APInt::getSignBit(SourceVT.getSizeInBits()); + break; + // TODO: ISD::OR --> ISD::FNABS? + default: + return SDValue(); + } + + // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X + // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X + SDValue LogicOp0 = N0.getOperand(0); + ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask && + LogicOp0.getOpcode() == ISD::BITCAST && + LogicOp0->getOperand(0).getValueType() == VT) + return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0)); + + return SDValue(); +} + SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7284,13 +7423,12 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { TLI.isOperationLegal(ISD::ConstantFP, VT)) || (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && TLI.isOperationLegal(ISD::Constant, VT))) - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); + return DAG.getBitcast(VT, N0); } // (conv (conv x, t1), t2) -> (conv x, t2) if (N0.getOpcode() == ISD::BITCAST) - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, - N0.getOperand(0)); + return DAG.getBitcast(VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) // If the resultant load doesn't need a higher alignment than the original! @@ -7303,21 +7441,24 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - unsigned Align = DAG.getDataLayout().getABITypeAlignment( - VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); - if (Align <= OrigAlign) { - SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), OrigAlign, - LN0->getAAInfo()); + bool Fast = false; + if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + LN0->getAddressSpace(), OrigAlign, &Fast) && + Fast) { + SDValue Load = + DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), OrigAlign, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; } } + if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI)) + return V; + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // @@ -7334,8 +7475,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector() && !N0.getValueType().isVector()) { - SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, - N0.getOperand(0)); + SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(NewConv.getNode()); SDLoc DL(N); @@ -7388,8 +7528,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { - SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), - IntXVT, N0.getOperand(1)); + SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1)); AddToWorklist(X.getNode()); // If X has a different width than the result/lhs, sext it or truncate it. @@ -7412,11 +7551,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); - SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT, - N0.getOperand(0)); + SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(Cst.getNode()); - SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT, - N0.getOperand(1)); + SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); AddToWorklist(X.getNode()); SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); AddToWorklist(XorResult.getNode()); @@ -7439,8 +7576,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { X, DAG.getConstant(SignBit, SDLoc(X), VT)); AddToWorklist(X.getNode()); - SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), - VT, N0.getOperand(0)); + SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); AddToWorklist(Cst.getNode()); @@ -7472,7 +7608,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { return SDValue(Op.getOperand(0)); if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + return DAG.getBitcast(VT, Op); return SDValue(); }; @@ -7529,8 +7665,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // we can end up with a scalar-to-vector node here. if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, - DAG.getNode(ISD::BITCAST, SDLoc(BV), - DstEltVT, BV->getOperand(0))); + DAG.getBitcast(DstEltVT, BV->getOperand(0))); SmallVector<SDValue, 8> Ops; for (SDValue Op : BV->op_values()) { @@ -7538,11 +7673,10 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); - Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), - DstEltVT, Op)); + Ops.push_back(DAG.getBitcast(DstEltVT, Op)); AddToWorklist(Ops.back().getNode()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); + return DAG.getBuildVector(VT, SDLoc(BV), Ops); } // Otherwise, we're growing or shrinking the elements. To avoid having to @@ -7584,7 +7718,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Shift the previously computed bits over. NewBits <<= SrcBitSize; SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); - if (Op.getOpcode() == ISD::UNDEF) continue; + if (Op.isUndef()) continue; EltIsUndef = false; NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). @@ -7598,7 +7732,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + return DAG.getBuildVector(VT, DL, Ops); } // Finally, this must be the case where we are shrinking elements: each input @@ -7609,7 +7743,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { SmallVector<SDValue, 8> Ops; for (const SDValue &Op : BV->op_values()) { - if (Op.getOpcode() == ISD::UNDEF) { + if (Op.isUndef()) { Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); continue; } @@ -7628,7 +7762,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + return DAG.getBuildVector(VT, DL, Ops); } /// Try to perform FMA combining on a given FADD node. @@ -7654,6 +7788,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); + const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); + ; + if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel)) + return SDValue(); + // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); @@ -7837,6 +7976,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); + const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); + if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel)) + return SDValue(); + // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); @@ -8305,7 +8448,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { AddToWorklist(Fused.getNode()); return Fused; } - return SDValue(); } @@ -8662,7 +8804,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { // Compute the reciprocal 1.0 / c2. - APFloat N1APF = N1CFP->getValueAPF(); + const APFloat &N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); // Only do the transform if the reciprocal is a legal fp immediate that @@ -8681,12 +8823,12 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) { return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); @@ -8694,7 +8836,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); @@ -8715,7 +8857,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SqrtOp.getNode()) { // We found a FSQRT, so try to make this fold: // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) - if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); @@ -8772,27 +8914,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { // For now, create a Flags object for use with all unsafe math transforms. SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); - - // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) - SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags); - if (!RV) - return SDValue(); - - EVT VT = RV.getValueType(); - SDLoc DL(N); - RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags); - AddToWorklist(RV.getNode()); - - // Unfortunately, RV is now NaN if the input was exactly 0. - // Select out this case and force the answer to 0. - SDValue Zero = DAG.getConstantFP(0.0, DL, VT); - EVT CCVT = getSetCCResultType(VT); - SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ); - AddToWorklist(ZeroCmp.getNode()); - AddToWorklist(RV.getNode()); - - return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, - ZeroCmp, Zero, RV); + return buildSqrtEstimate(N->getOperand(0), &Flags); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -8868,7 +8990,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (isConstantIntBuildVectorOrConstantInt(N0) && + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -8922,7 +9044,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (isConstantIntBuildVectorOrConstantInt(N0) && + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -8993,9 +9115,7 @@ static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { } if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); - if (SrcVT == VT) - return Src; - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src); + return DAG.getBitcast(VT, Src); } return SDValue(); } @@ -9040,6 +9160,17 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { if (N0.getOpcode() == ISD::FP_ROUND) { const bool NIsTrunc = N->getConstantOperandVal(1) == 1; const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; + + // Skip this folding if it results in an fp_round from f80 to f16. + // + // f80 to f16 always generates an expensive (and as yet, unimplemented) + // libcall to __truncxfhf2 instead of selecting native f16 conversion + // instructions from f32 or f64. Moreover, the first (value-preserving) + // fp_round from f80 to either f32 or f64 may become a NOP in platforms like + // x86. + if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16) + return SDValue(); + // If the first fp_round isn't a value preserving truncation, it might // introduce a tie in the second fp_round, that wouldn't occur in the // single-step fp_round we want to fold to. @@ -9198,7 +9329,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, DAG.getConstant(SignMask, DL0, IntVT)); AddToWorklist(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); + return DAG.getBitcast(VT, Int); } } @@ -9303,7 +9434,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { Int = DAG.getNode(ISD::AND, DL, IntVT, Int, DAG.getConstant(SignMask, DL, IntVT)); AddToWorklist(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); + return DAG.getBitcast(N->getValueType(0), Int); } } @@ -9607,6 +9738,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; } + // Caches for hasPredecessorHelper. + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + Worklist.push_back(N); + // If the offset is a constant, there may be other adds of constants that // can be folded with this one. We should do this to avoid having to keep // a copy of the original base pointer. @@ -9621,7 +9757,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (Use.getUser() == Ptr.getNode() || Use != BasePtr) continue; - if (Use.getUser()->isPredecessorOf(N)) + if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)) continue; if (Use.getUser()->getOpcode() != ISD::ADD && @@ -9651,14 +9787,10 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Now check for #3 and #4. bool RealUse = false; - // Caches for hasPredecessorHelper - SmallPtrSet<const SDNode *, 32> Visited; - SmallVector<const SDNode *, 16> Worklist; - for (SDNode *Use : Ptr.getNode()->uses()) { if (Use == N) continue; - if (N->hasPredecessorHelper(Use, Visited, Worklist)) + if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) return false; // If Ptr may be folded in addressing mode of other use, then it's @@ -9720,7 +9852,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { ConstantSDNode *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); int X0, X1, Y0, Y1; - APInt Offset0 = CN->getAPIntValue(); + const APInt &Offset0 = CN->getAPIntValue(); APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; @@ -9984,13 +10116,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getMemOperand()->getBaseAlignment()) { - SDValue NewLoad = - DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), - LD->getValueType(0), - Chain, Ptr, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), Align, LD->getAAInfo()); + SDValue NewLoad = DAG.getExtLoad( + LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), LD->getMemoryVT(), Align, + LD->getMemOperand()->getFlags(), LD->getAAInfo()); if (NewLoad.getNode() != N) return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } @@ -10208,7 +10337,7 @@ struct LoadedSlice { return false; // Offsets are for indexed load only, we do not handle that. - if (Origin->getOffset().getOpcode() != ISD::UNDEF) + if (!Origin->getOffset().isUndef()) return false; const TargetLowering &TLI = DAG->getTargetLoweringInfo(); @@ -10291,10 +10420,10 @@ struct LoadedSlice { EVT SliceType = getLoadedType(); // Create the load for the slice. - SDValue LastInst = DAG->getLoad( - SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, - Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), - Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); + SDValue LastInst = + DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, + Origin->getPointerInfo().getWithOffset(Offset), + getAlignment(), Origin->getMemOperand()->getFlags()); // If the final type is not the same as the loaded type, this means that // we have to pad with zero. Create a zero extend for that. EVT FinalType = Inst->getValueType(0); @@ -10718,9 +10847,10 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); ++OpsNarrowed; - return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, - St->getPointerInfo().getWithOffset(StOffset), - false, false, NewAlign).getNode(); + return DAG + .getStore(St->getChain(), SDLoc(St), IVal, Ptr, + St->getPointerInfo().getWithOffset(StOffset), NewAlign) + .getNode(); } @@ -10826,19 +10956,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { Ptr.getValueType(), Ptr, DAG.getConstant(PtrOff, SDLoc(LD), Ptr.getValueType())); - SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), - LD->getChain(), NewPtr, - LD->getPointerInfo().getWithOffset(PtrOff), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), NewAlign, - LD->getAAInfo()); + SDValue NewLD = + DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, + LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, + LD->getMemOperand()->getFlags(), LD->getAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, SDLoc(Value), NewVT)); - SDValue NewST = DAG.getStore(Chain, SDLoc(N), - NewVal, NewPtr, - ST->getPointerInfo().getWithOffset(PtrOff), - false, false, NewAlign); + SDValue NewST = + DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, + ST->getPointerInfo().getWithOffset(PtrOff), NewAlign); AddToWorklist(NewPtr.getNode()); AddToWorklist(NewLD.getNode()); @@ -10887,15 +11014,13 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); - SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), - LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - false, false, false, LDAlign); + SDValue NewLD = + DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), LDAlign); - SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), - NewLD, ST->getBasePtr(), - ST->getPointerInfo(), - false, false, STAlign); + SDValue NewST = + DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), + ST->getPointerInfo(), STAlign); AddToWorklist(NewLD.getNode()); AddToWorklist(NewST.getNode()); @@ -10940,9 +11065,23 @@ struct BaseIndexOffset { } /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr) { + static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) { bool IsIndexSignExt = false; + // Split up a folded GlobalAddress+Offset into its component parts. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr)) + if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) { + return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), + SDLoc(GA), + GA->getValueType(0), + /*Offset=*/0, + /*isTargetGA=*/false, + GA->getTargetFlags()), + SDValue(), + GA->getOffset(), + IsIndexSignExt); + } + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD // instruction, then it could be just the BASE or everything else we don't // know how to handle. Just use Ptr as BASE and give up. @@ -11063,7 +11202,7 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, // multiply (CONST * A) after we also do the same transformation // to the "t2" instruction. if (OtherOp->getOpcode() == ISD::ADD && - isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && + DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && OtherOp->getOperand(0).getNode() == MulVar) return true; } @@ -11073,11 +11212,9 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, return false; } -SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, - SDLoc SL, - ArrayRef<MemOpLink> Stores, - SmallVectorImpl<SDValue> &Chains, - EVT Ty) const { +SDValue DAGCombiner::getMergedConstantVectorStore( + SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores, + SmallVectorImpl<SDValue> &Chains, EVT Ty) const { SmallVector<SDValue, 8> BuildVector; for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { @@ -11086,7 +11223,7 @@ SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, BuildVector.push_back(St->getValue()); } - return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); + return DAG.getBuildVector(Ty, SL, BuildVector); } bool DAGCombiner::MergeStoresOfConstantsOrVecElts( @@ -11182,29 +11319,36 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), - false, false, FirstInChain->getAlignment()); - // Replace the last store with the new store - CombineTo(LatestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumStores; ++i) { - if (StoreNodes[i].MemNode == LatestOp) - continue; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - // ReplaceAllUsesWith will replace all uses that existed when it was - // called, but graph optimizations may cause new ones to appear. For - // example, the case in pr14333 looks like - // - // St's chain -> St -> another store -> X - // - // And the only difference from St to the other store is the chain. - // When we change it's chain to be St's chain they become identical, - // get CSEed and the net result is that X is now a use of St. - // Since we know that St is redundant, just iterate. - while (!St->use_empty()) - DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + if (UseAA) { + // Replace all merged stores with the new store. + for (unsigned i = 0; i < NumStores; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); + } else { + // Replace the last store with the new store. + CombineTo(LatestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumStores; ++i) { + if (StoreNodes[i].MemNode == LatestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + // ReplaceAllUsesWith will replace all uses that existed when it was + // called, but graph optimizations may cause new ones to appear. For + // example, the case in pr14333 looks like + // + // St's chain -> St -> another store -> X + // + // And the only difference from St to the other store is the chain. + // When we change it's chain to be St's chain they become identical, + // get CSEed and the net result is that X is now a use of St. + // Since we know that St is redundant, just iterate. + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } } return true; @@ -11215,14 +11359,14 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); // We must have a base and an offset. if (!BasePtr.Base.getNode()) return; // Do not handle stores to undef base pointers. - if (BasePtr.Base.getOpcode() == ISD::UNDEF) + if (BasePtr.Base.isUndef()) return; // Walk up the chain and look for nodes with offsets from the same @@ -11253,7 +11397,7 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( if (OtherST->getMemoryVT() != MemVT) continue; - BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG); if (Ptr.equalBaseIndex(BasePtr)) StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); @@ -11269,7 +11413,7 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); // Check that the base pointer is the same as the original one. if (!Ptr.equalBaseIndex(BasePtr)) @@ -11280,9 +11424,8 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( break; // No truncation. - if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) - if (St->isTruncatingStore()) - break; + if (Index->isTruncatingStore()) + break; // The stored memory type must be the same. if (Index->getMemoryVT() != MemVT) @@ -11326,6 +11469,30 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( } } +// We need to check that merging these stores does not cause a loop +// in the DAG. Any store candidate may depend on another candidate +// indirectly through its operand (we already consider dependencies +// through the chain). Check in parallel by searching up from +// non-chain operands of candidates. +bool DAGCombiner::checkMergeStoreCandidatesForDependencies( + SmallVectorImpl<MemOpLink> &StoreNodes) { + SmallPtrSet<const SDNode *, 16> Visited; + SmallVector<const SDNode *, 8> Worklist; + // search ops of store candidates + for (unsigned i = 0; i < StoreNodes.size(); ++i) { + SDNode *n = StoreNodes[i].MemNode; + // Potential loops may happen only through non-chain operands + for (unsigned j = 1; j < n->getNumOperands(); ++j) + Worklist.push_back(n->getOperand(j).getNode()); + } + // search through DAG. We can stop early if we find a storenode + for (unsigned i = 0; i < StoreNodes.size(); ++i) { + if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist)) + return false; + } + return true; +} + bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (OptLevel == CodeGenOpt::None) return false; @@ -11379,6 +11546,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (StoreNodes.size() < 2) return false; + // only do dep endence check in AA case + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes)) + return false; + // Sort the memory operands according to their distance from the // base pointer. As a secondary criteria: make sure stores coming // later in the code come first in the list. This is important for @@ -11557,7 +11730,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Ld->getMemoryVT() != MemVT) break; - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); // If this is not the first ptr that we check. if (LdBasePtr.Base.getNode()) { // The base ptr must be the same. @@ -11690,16 +11863,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // The merged loads are required to have the same incoming chain, so // using the first's chain is acceptable. - SDValue NewLoad = DAG.getLoad( - JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), + FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), FirstLoadAlign); SDValue NewStoreChain = DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); - SDValue NewStore = DAG.getStore( - NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); + SDValue NewStore = + DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign); // Transfer chain users from old loads to the new load. for (unsigned i = 0; i < NumElem; ++i) { @@ -11708,16 +11881,22 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { SDValue(NewLoad.getNode(), 1)); } - // Replace the last store with the new store. - CombineTo(LatestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumElem ; ++i) { - // Remove all Store nodes. - if (StoreNodes[i].MemNode == LatestOp) - continue; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); + if (UseAA) { + // Replace the all stores with the new store. + for (unsigned i = 0; i < NumElem; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); + } else { + // Replace the last store with the new store. + CombineTo(LatestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem; ++i) { + // Remove all Store nodes. + if (StoreNodes[i].MemNode == LatestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } } return true; @@ -11808,21 +11987,17 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { std::swap(Lo, Hi); unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); - SDValue St0 = DAG.getStore(Chain, DL, Lo, - Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, - ST->getAlignment(), AAInfo); + SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), + ST->getAlignment(), MMOFlags, AAInfo); Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, DAG.getConstant(4, DL, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); - SDValue St1 = DAG.getStore(Chain, DL, Hi, - Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, - Alignment, AAInfo); + SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, + ST->getPointerInfo().getWithOffset(4), + Alignment, MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, St0, St1); } @@ -11841,21 +12016,24 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // resultant store does not need a higher alignment than the original. if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && ST->isUnindexed()) { - unsigned OrigAlign = ST->getAlignment(); EVT SVT = Value.getOperand(0).getValueType(); - unsigned Align = DAG.getDataLayout().getABITypeAlignment( - SVT.getTypeForEVT(*DAG.getContext())); - if (Align <= OrigAlign && - ((!LegalOperations && !ST->isVolatile()) || - TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) - return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), OrigAlign, - ST->getAAInfo()); + if (((!LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) && + TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { + unsigned OrigAlign = ST->getAlignment(); + bool Fast = false; + if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, + ST->getAddressSpace(), OrigAlign, &Fast) && + Fast) { + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, + ST->getPointerInfo(), OrigAlign, + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + } + } } // Turn 'store undef, Ptr' -> nothing. - if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) + if (Value.isUndef() && ST->isUnindexed()) return Chain; // Try to infer better alignment information than the store already has. @@ -11863,10 +12041,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) { SDValue NewStore = - DAG.getTruncStore(Chain, SDLoc(N), Value, - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), Align, - ST->getAAInfo()); + DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), + ST->getMemoryVT(), Align, + ST->getMemOperand()->getFlags(), ST->getAAInfo()); if (NewStore.getNode() != N) return CombineTo(ST, NewStore, true); } @@ -11898,6 +12075,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // manipulation. Return the original node to not do anything else. return SDValue(ST, 0); } + Chain = ST->getChain(); } // Try transforming N to an indexed store. @@ -12001,7 +12179,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDLoc dl(N); // If the inserted element is an UNDEF, just use the input vector. - if (InVal.getOpcode() == ISD::UNDEF) + if (InVal.isUndef()) return InVec; EVT VT = InVec.getValueType(); @@ -12045,7 +12223,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { Ops.append(InVec.getNode()->op_begin(), InVec.getNode()->op_end()); - } else if (InVec.getOpcode() == ISD::UNDEF) { + } else if (InVec.isUndef()) { unsigned NElts = VT.getVectorNumElements(); Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); } else { @@ -12065,11 +12243,13 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } // Return the new vector - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + return DAG.getBuildVector(VT, dl, Ops); } SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { + assert(!OriginalLoad->isVolatile()); + EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); unsigned Align = OriginalLoad->getAlignment(); @@ -12115,21 +12295,20 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( VecEltVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad( - ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, - VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), - OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); + Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, + OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, + Align, OriginalLoad->getMemOperand()->getFlags(), + OriginalLoad->getAAInfo()); Chain = Load.getValue(1); } else { - Load = DAG.getLoad( - VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, - OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), - OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); + Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, + MPI, Align, OriginalLoad->getMemOperand()->getFlags(), + OriginalLoad->getAAInfo()); Chain = Load.getValue(1); if (ResultVT.bitsLT(VecEltVT)) Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); else - Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); + Load = DAG.getBitcast(ResultVT, Load); } WorklistRemover DeadNodes(*this); SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; @@ -12183,6 +12362,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // converts. } + // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x) + if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() && + ConstEltNo->isNullValue() && VT.isInteger()) { + SDValue BCSrc = InVec.getOperand(0); + if (BCSrc.getValueType().isScalarInteger()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc); + } + + // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val + // + // This only really matters if the index is non-constant since other combines + // on the constant elements already work. + if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && + EltNo == InVec.getOperand(2)) { + SDValue Elt = InVec.getOperand(1); + return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt; + } + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD @@ -12256,9 +12453,12 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { ISD::isNormalLoad(InVec.getNode()) && !N->getOperand(1)->hasPredecessor(InVec.getNode())) { SDValue Index = N->getOperand(1); - if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) - return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, - OrigLoad); + if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) { + if (!OrigLoad->isVolatile()) { + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, + OrigLoad); + } + } } // Perform only after legalization to ensure build_vector / vector_shuffle @@ -12358,7 +12558,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); // Ignore undef inputs. - if (In.getOpcode() == ISD::UNDEF) continue; + if (In.isUndef()) continue; bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; @@ -12413,9 +12613,9 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { SDValue Cast = N->getOperand(i); assert((Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || - Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); + Cast.isUndef()) && "Invalid cast opcode"); SDValue In; - if (Cast.getOpcode() == ISD::UNDEF) + if (Cast.isUndef()) In = DAG.getUNDEF(SourceType); else In = Cast->getOperand(0); @@ -12434,12 +12634,12 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!isTypeLegal(VecVT)) return SDValue(); // Make the new BUILD_VECTOR. - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); + SDValue BV = DAG.getBuildVector(VecVT, dl, Ops); // The new BUILD_VECTOR node has the potential to be further optimized. AddToWorklist(BV.getNode()); // Bitcast to the desired type. - return DAG.getNode(ISD::BITCAST, dl, VT, BV); + return DAG.getBitcast(VT, BV); } SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { @@ -12502,12 +12702,12 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); - if (In.getOpcode() == ISD::UNDEF) + if (In.isUndef()) Opnds.push_back(DAG.getUNDEF(SrcVT)); else Opnds.push_back(In.getOperand(0)); } - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); + SDValue BV = DAG.getBuildVector(NVT, dl, Opnds); AddToWorklist(BV.getNode()); return DAG.getNode(Opcode, dl, VT, BV); @@ -12545,7 +12745,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { for (unsigned i = 0; i != NumInScalars; ++i) { SDValue Op = N->getOperand(i); // Ignore undef inputs. - if (Op.getOpcode() == ISD::UNDEF) continue; + if (Op.isUndef()) continue; // See if we can combine this build_vector into a blend with a zero vector. if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) { @@ -12681,7 +12881,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { SDValue Ops[2]; Ops[0] = VecIn1; Ops[1] = VecIn2; - return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); + return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask); } return SDValue(); @@ -12735,18 +12935,17 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { for (SDValue &Op : Ops) { if (Op.getValueType() == SVT) continue; - if (Op.getOpcode() == ISD::UNDEF) + if (Op.isUndef()) Op = ScalarUndef; else - Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op); + Op = DAG.getBitcast(SVT, Op); } } } EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, VT.getSizeInBits() / SVT.getSizeInBits()); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); + return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); } // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR @@ -12768,7 +12967,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { Op = Op.getOperand(0); // UNDEF nodes convert to UNDEF shuffle mask values. - if (Op.getOpcode() == ISD::UNDEF) { + if (Op.isUndef()) { Mask.append((unsigned)NumOpElts, -1); continue; } @@ -12788,7 +12987,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { ExtVec = ExtVec.getOperand(0); // UNDEF nodes convert to UNDEF shuffle mask values. - if (ExtVec.getOpcode() == ISD::UNDEF) { + if (ExtVec.isUndef()) { Mask.append((unsigned)NumOpElts, -1); continue; } @@ -12812,11 +13011,11 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { return SDValue(); // At most we can reference 2 inputs in the final shuffle. - if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) { + if (SV0.isUndef() || SV0 == ExtVec) { SV0 = ExtVec; for (int i = 0; i != NumOpElts; ++i) Mask.push_back(i + ExtIdx); - } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) { + } else if (SV1.isUndef() || SV1 == ExtVec) { SV1 = ExtVec; for (int i = 0; i != NumOpElts; ++i) Mask.push_back(i + ExtIdx + NumElts); @@ -12844,7 +13043,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // Optimize concat_vectors where all but the first of the vectors are undef. if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { - return Op.getOpcode() == ISD::UNDEF; + return Op.isUndef(); })) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); @@ -12874,7 +13073,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDLoc dl = SDLoc(N); SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); - return DAG.getNode(ISD::BITCAST, dl, VT, Res); + return DAG.getBitcast(VT, Res); } } @@ -12885,9 +13084,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { auto IsBuildVectorOrUndef = [](const SDValue &Op) { return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); }; - bool AllBuildVectorsOrUndefs = - std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef); - if (AllBuildVectorsOrUndefs) { + if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) { SmallVector<SDValue, 8> Opnds; EVT SVT = VT.getScalarType(); @@ -12926,7 +13123,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { assert(VT.getVectorNumElements() == Opnds.size() && "Concat vector type mismatch"); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); + return DAG.getBuildVector(VT, SDLoc(N), Opnds); } // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. @@ -12948,7 +13145,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDValue Op = N->getOperand(i); - if (Op.getOpcode() == ISD::UNDEF) + if (Op.isUndef()) continue; // Check if this is the identity extract: @@ -13033,11 +13230,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // otherwise => (extract_subvec V1, ExtIdx) if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) - return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, - DAG.getNode(ISD::BITCAST, dl, - N->getOperand(0).getValueType(), - V->getOperand(0)), N->getOperand(1)); + return DAG.getBitcast(NVT, V->getOperand(1)); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, NVT, + DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)), + N->getOperand(1)); } } @@ -13148,7 +13345,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { // Special case: shuffle(concat(A,B)) can be more efficiently represented // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high // half vector elements. - if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF && + if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() && std::all_of(SVN->getMask().begin() + NumElemsPerConcat, SVN->getMask().end(), [](int i) { return i == -1; })) { N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), @@ -13204,7 +13401,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); // Canonicalize shuffle undef, undef -> undef - if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() && N1.isUndef()) return DAG.getUNDEF(VT); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); @@ -13217,29 +13414,15 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (Idx >= (int)NumElts) Idx -= NumElts; NewMask.push_back(Idx); } - return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), - &NewMask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); } // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. - if (N0.getOpcode() == ISD::UNDEF) { - SmallVector<int, 8> NewMask; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx = SVN->getMaskElt(i); - if (Idx >= 0) { - if (Idx >= (int)NumElts) - Idx -= NumElts; - else - Idx = -1; // remove reference to lhs - } - NewMask.push_back(Idx); - } - return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), - &NewMask[0]); - } + if (N0.isUndef()) + return DAG.getCommutedVectorShuffle(*SVN); // Remove references to rhs if it is undef - if (N1.getOpcode() == ISD::UNDEF) { + if (N1.isUndef()) { bool Changed = false; SmallVector<int, 8> NewMask; for (unsigned i = 0; i != NumElts; ++i) { @@ -13251,7 +13434,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { NewMask.push_back(Idx); } if (Changed) - return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } // If it is a splat, check if the argument vector is another splat or a @@ -13275,7 +13458,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue Base; bool AllSame = true; for (unsigned i = 0; i != NumElts; ++i) { - if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + if (!V->getOperand(i).isUndef()) { Base = V->getOperand(i); break; } @@ -13296,13 +13479,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Canonicalize any other splat as a build_vector. const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); SmallVector<SDValue, 8> Ops(NumElts, Splatted); - SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - V->getValueType(0), Ops); + SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops); // We may have jumped through bitcasts, so the type of the // BUILD_VECTOR may not match the type of the shuffle. if (V->getValueType(0) != VT) - NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); + NewBV = DAG.getBitcast(VT, NewBV); return NewBV; } } @@ -13315,12 +13497,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (N0.getOpcode() == ISD::CONCAT_VECTORS && Level < AfterLegalizeVectorOps && - (N1.getOpcode() == ISD::UNDEF || + (N1.isUndef() || (N1.getOpcode() == ISD::CONCAT_VECTORS && N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { - SDValue V = partitionShuffleOfConcats(N, DAG); - - if (V.getNode()) + if (SDValue V = partitionShuffleOfConcats(N, DAG)) return V; } @@ -13357,7 +13537,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { Op = TLI.isZExtFree(Op.getValueType(), SVT) ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT) : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops); + return DAG.getBuildVector(VT, SDLoc(N), Ops); } } @@ -13365,7 +13545,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // attempt to merge the 2 shuffles and suitably bitcast the inputs/output // back to their original types. if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && - N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps && + N1.isUndef() && Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { // Peek through the bitcast only if there is one user. @@ -13426,11 +13606,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } if (LegalMask) { - SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0); - SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1); - return DAG.getNode( - ISD::BITCAST, SDLoc(N), VT, - DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); + SV0 = DAG.getBitcast(ScaleVT, SV0); + SV1 = DAG.getBitcast(ScaleVT, SV1); + return DAG.getBitcast( + VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); } } } @@ -13451,7 +13630,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue SV0 = N1->getOperand(0); SDValue SV1 = N1->getOperand(1); bool HasSameOp0 = N0 == SV0; - bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; + bool IsSV1Undef = SV1.isUndef(); if (HasSameOp0 || IsSV1Undef || N0 == SV1) // Commute the operands of this shuffle so that next rule // will trigger. @@ -13504,7 +13683,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // Simple case where 'CurrentVec' is UNDEF. - if (CurrentVec.getOpcode() == ISD::UNDEF) { + if (CurrentVec.isUndef()) { Mask.push_back(-1); continue; } @@ -13559,7 +13738,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); } return SDValue(); @@ -13595,26 +13774,30 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); + if (N0.getValueType() != N1.getValueType()) + return SDValue(); + // If the input vector is a concatenation, and the insert replaces // one of the halves, we can optimize into a single concat_vectors. - if (N0.getOpcode() == ISD::CONCAT_VECTORS && - N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { + if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 && + N2.getOpcode() == ISD::Constant) { APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); EVT VT = N->getValueType(0); // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors Z, Y) if (InsIdx == 0) - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, - N->getOperand(1), N0.getOperand(1)); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1, + N0.getOperand(1)); // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors X, Z) - if (InsIdx == VT.getVectorNumElements()/2) - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, - N0.getOperand(0), N->getOperand(1)); + if (InsIdx == VT.getVectorNumElements() / 2) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0), + N1); } return SDValue(); @@ -13684,7 +13867,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { int EltIdx = i / Split; int SubIdx = i % Split; SDValue Elt = RHS.getOperand(EltIdx); - if (Elt.getOpcode() == ISD::UNDEF) { + if (Elt.isUndef()) { Indices.push_back(-1); continue; } @@ -13724,7 +13907,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue Zero = DAG.getConstant(0, dl, ClearVT); return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl, DAG.getBitcast(ClearVT, LHS), - Zero, &Indices[0])); + Zero, Indices)); }; // Determine maximum split level (byte level masking). @@ -13763,8 +13946,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // -> (shuffle (VBinOp (A, B)), Undef, Mask). if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && - LHS.getOperand(1).getOpcode() == ISD::UNDEF && - RHS.getOperand(1).getOpcode() == ISD::UNDEF) { + LHS.getOperand(1).isUndef() && + RHS.getOperand(1).isUndef()) { ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); @@ -13776,15 +13959,15 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { N->getFlags()); AddUsersToWorklist(N); return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, - &SVN0->getMask()[0]); + SVN0->getMask()); } } return SDValue(); } -SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, - SDValue N1, SDValue N2){ +SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2) { assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, @@ -13819,33 +14002,33 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { - // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) - // The select + setcc is redundant, because fsqrt returns NaN for X < -0. + // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) + // The select + setcc is redundant, because fsqrt returns NaN for X < 0. if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) SDValue Sqrt = RHS; ISD::CondCode CC; SDValue CmpLHS; - const ConstantFPSDNode *NegZero = nullptr; + const ConstantFPSDNode *Zero = nullptr; if (TheSelect->getOpcode() == ISD::SELECT_CC) { CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); CmpLHS = TheSelect->getOperand(0); - NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1)); + Zero = isConstOrConstSplatFP(TheSelect->getOperand(1)); } else { // SELECT or VSELECT SDValue Cmp = TheSelect->getOperand(0); if (Cmp.getOpcode() == ISD::SETCC) { CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); CmpLHS = Cmp.getOperand(0); - NegZero = isConstOrConstSplatFP(Cmp.getOperand(1)); + Zero = isConstOrConstSplatFP(Cmp.getOperand(1)); } } - if (NegZero && NegZero->isNegative() && NegZero->isZero() && + if (Zero && Zero->isZero() && Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || CC == ISD::SETULT || CC == ISD::SETLT)) { - // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) CombineTo(TheSelect, Sqrt); return true; } @@ -13932,24 +14115,22 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // It is safe to replace the two loads if they have different alignments, // but the new load must be the minimum (most restrictive) alignment of the // inputs. - bool isInvariant = LLD->isInvariant() & RLD->isInvariant(); unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); + MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); + if (!RLD->isInvariant()) + MMOFlags &= ~MachineMemOperand::MOInvariant; if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { - Load = DAG.getLoad(TheSelect->getValueType(0), - SDLoc(TheSelect), - // FIXME: Discards pointer and AA info. - LLD->getChain(), Addr, MachinePointerInfo(), - LLD->isVolatile(), LLD->isNonTemporal(), - isInvariant, Alignment); + // FIXME: Discards pointer and AA info. + Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), + LLD->getChain(), Addr, MachinePointerInfo(), Alignment, + MMOFlags); } else { - Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? - RLD->getExtensionType() : LLD->getExtensionType(), - SDLoc(TheSelect), - TheSelect->getValueType(0), - // FIXME: Discards pointer and AA info. - LLD->getChain(), Addr, MachinePointerInfo(), - LLD->getMemoryVT(), LLD->isVolatile(), - LLD->isNonTemporal(), isInvariant, Alignment); + // FIXME: Discards pointer and AA info. + Load = DAG.getExtLoad( + LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() + : LLD->getExtensionType(), + SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr, + MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags); } // Users of the select now use the result of the load. @@ -13967,9 +14148,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. -SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, - SDValue N2, SDValue N3, - ISD::CondCode CC, bool NotExtCompare) { +SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, ISD::CondCode CC, + bool NotExtCompare) { // (x ? y : y) -> y. if (N2 == N3) return N2; @@ -14057,7 +14238,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, return DAG.getLoad( TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - false, false, false, Alignment); + Alignment); } } @@ -14116,7 +14297,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { // Shift the tested bit over the sign bit. - APInt AndMask = ConstAndRHS->getAPIntValue(); + const APInt &AndMask = ConstAndRHS->getAPIntValue(); SDValue ShlAmt = DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), getShiftAmountTy(AndLHS.getValueType())); @@ -14210,13 +14391,48 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, } } + // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) + // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) + // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) + // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X) + // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X) + // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) + // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) + // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) + if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + SDValue ValueOnZero = N2; + SDValue Count = N3; + // If the condition is NE instead of E, swap the operands. + if (CC == ISD::SETNE) + std::swap(ValueOnZero, Count); + // Check if the value on zero is a constant equal to the bits in the type. + if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) { + if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) { + // If the other operand is cttz/cttz_zero_undef of N0, and cttz is + // legal, combine to just cttz. + if ((Count.getOpcode() == ISD::CTTZ || + Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && + N0 == Count.getOperand(0) && + (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT))) + return DAG.getNode(ISD::CTTZ, DL, VT, N0); + // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is + // legal, combine to just ctlz. + if ((Count.getOpcode() == ISD::CTLZ || + Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) && + N0 == Count.getOperand(0) && + (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT))) + return DAG.getNode(ISD::CTLZ, DL, VT, N0); + } + } + } + return SDValue(); } /// This is a stub for TargetLowering::SimplifySetCC. -SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, - SDValue N1, ISD::CondCode Cond, - SDLoc DL, bool foldBooleans) { +SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &DL, + bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); @@ -14227,6 +14443,11 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, /// by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildSDIV(SDNode *N) { + // when optimising for minimum size, we don't want to expand a div to a mul + // and a shift. + if (DAG.getMachineFunction().getFunction()->optForMinSize()) + return SDValue(); + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) return SDValue(); @@ -14268,6 +14489,11 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { /// number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildUDIV(SDNode *N) { + // when optimising for minimum size, we don't want to expand a div to a mul + // and a shift. + if (DAG.getMachineFunction().getFunction()->optForMinSize()) + return SDValue(); + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) return SDValue(); @@ -14334,9 +14560,9 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { /// => /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. -SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags *Flags) { +SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, + unsigned Iterations, + SDNodeFlags *Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); @@ -14363,6 +14589,13 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } + + // If non-reciprocal square root is requested, multiply the result by Arg. + if (!Reciprocal) { + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); + AddToWorklist(Est.getNode()); + } + return Est; } @@ -14371,35 +14604,55 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) -SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags *Flags) { +SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, + unsigned Iterations, + SDNodeFlags *Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); - // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) - for (unsigned i = 0; i < Iterations; ++i) { - SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); - AddToWorklist(HalfEst.getNode()); - - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - AddToWorklist(Est.getNode()); + // This routine must enter the loop below to work correctly + // when (Reciprocal == false). + assert(Iterations > 0); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); - AddToWorklist(Est.getNode()); - - Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags); - AddToWorklist(Est.getNode()); + // Newton iterations for reciprocal square root: + // E = (E * -0.5) * ((A * E) * E + -3.0) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); + AddToWorklist(AE.getNode()); + + SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); + AddToWorklist(AEE.getNode()); + + SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); + AddToWorklist(RHS.getNode()); + + // When calculating a square root at the last iteration build: + // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) + // (notice a common subexpression) + SDValue LHS; + if (Reciprocal || (i + 1) < Iterations) { + // RSQRT: LHS = (E * -0.5) + LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); + } else { + // SQRT: LHS = (A * E) * -0.5 + LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); + } + AddToWorklist(LHS.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); AddToWorklist(Est.getNode()); } + return Est; } -SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { +/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case +/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if +/// Op can be zero. +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, + bool Reciprocal) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -14410,9 +14663,9 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { AddToWorklist(Est.getNode()); if (Iterations) { - Est = UseOneConstNR ? - BuildRsqrtNROneConst(Op, Est, Iterations, Flags) : - BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags); + Est = UseOneConstNR + ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); } return Est; } @@ -14420,6 +14673,30 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { return SDValue(); } +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { + return buildSqrtEstimateImpl(Op, Flags, true); +} + +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) { + SDValue Est = buildSqrtEstimateImpl(Op, Flags, false); + if (!Est) + return SDValue(); + + // Unfortunately, Est is now NaN if the input was exactly 0. + // Select out this case and force the answer to 0. + EVT VT = Est.getValueType(); + SDLoc DL(Op); + SDValue Zero = DAG.getConstantFP(0.0, DL, VT); + EVT CCVT = getSetCCResultType(VT); + SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + + Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp, + Zero, Est); + AddToWorklist(Est.getNode()); + return Est; +} + /// Return true if base is a frame index, which is known not to alias with /// anything but itself. Provides base object and offset as results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, @@ -14514,7 +14791,7 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && (Op0->getMemoryVT().getSizeInBits() >> 3 == Op1->getMemoryVT().getSizeInBits() >> 3) && - (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) { + (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) { int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); @@ -14634,63 +14911,6 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, break; } } - - // We need to be careful here to also search for aliases through the - // value operand of a store, etc. Consider the following situation: - // Token1 = ... - // L1 = load Token1, %52 - // S1 = store Token1, L1, %51 - // L2 = load Token1, %52+8 - // S2 = store Token1, L2, %51+8 - // Token2 = Token(S1, S2) - // L3 = load Token2, %53 - // S3 = store Token2, L3, %52 - // L4 = load Token2, %53+8 - // S4 = store Token2, L4, %52+8 - // If we search for aliases of S3 (which loads address %52), and we look - // only through the chain, then we'll miss the trivial dependence on L1 - // (which also loads from %52). We then might change all loads and - // stores to use Token1 as their chain operand, which could result in - // copying %53 into %52 before copying %52 into %51 (which should - // happen first). - // - // The problem is, however, that searching for such data dependencies - // can become expensive, and the cost is not directly related to the - // chain depth. Instead, we'll rule out such configurations here by - // insisting that we've visited all chain users (except for users - // of the original chain, which is not necessary). When doing this, - // we need to look through nodes we don't care about (otherwise, things - // like register copies will interfere with trivial cases). - - SmallVector<const SDNode *, 16> Worklist; - for (const SDNode *N : Visited) - if (N != OriginalChain.getNode()) - Worklist.push_back(N); - - while (!Worklist.empty()) { - const SDNode *M = Worklist.pop_back_val(); - - // We have already visited M, and want to make sure we've visited any uses - // of M that we care about. For uses that we've not visisted, and don't - // care about, queue them to the worklist. - - for (SDNode::use_iterator UI = M->use_begin(), - UIE = M->use_end(); UI != UIE; ++UI) - if (UI.getUse().getValueType() == MVT::Other && - Visited.insert(*UI).second) { - if (isa<MemSDNode>(*UI)) { - // We've not visited this use, and we care about it (it could have an - // ordering dependency with the original node). - Aliases.clear(); - Aliases.push_back(OriginalChain); - return; - } - - // We've not visited this use, but we don't care about it. Mark it as - // visited and enqueue it to the worklist. - Worklist.push_back(*UI); - } - } } /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain @@ -14713,17 +14933,17 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } -bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { +bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); // We must have a base and an offset. if (!BasePtr.Base.getNode()) return false; // Do not handle stores to undef base pointers. - if (BasePtr.Base.getOpcode() == ISD::UNDEF) + if (BasePtr.Base.isUndef()) return false; SmallVector<StoreSDNode *, 8> ChainedStores; @@ -14742,7 +14962,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); // Check that the base pointer is the same as the original one. if (!Ptr.equalBaseIndex(BasePtr)) @@ -14756,6 +14976,10 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { while (true) { if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { // We found a store node. Use it for the next iteration. + if (STn->isVolatile() || STn->isIndexed()) { + Index = nullptr; + break; + } ChainedStores.push_back(STn); Index = STn; break; @@ -14769,7 +14993,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { } } - bool MadeChange = false; + bool MadeChangeToSt = false; SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains; for (StoreSDNode *ChainedStore : ChainedStores) { @@ -14777,7 +15001,8 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { SDValue BetterChain = FindBetterChain(ChainedStore, Chain); if (Chain != BetterChain) { - MadeChange = true; + if (ChainedStore == St) + MadeChangeToSt = true; BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); } } @@ -14787,7 +15012,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { for (auto Replacement : BetterChains) replaceStoreChain(Replacement.first, Replacement.second); - return MadeChange; + return MadeChangeToSt; } /// This is the entry point for the file. |