summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2333
1 files changed, 1441 insertions, 892 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5ecc6da..2c7bffe 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16,14 +16,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -181,7 +182,7 @@ namespace {
/// if things it uses can be simplified by bit propagation.
/// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
- unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
APInt Demanded = APInt::getAllOnesValue(BitWidth);
return SimplifyDemandedBits(Op, Demanded);
}
@@ -326,7 +327,7 @@ namespace {
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
- SDValue visitFMULForFMACombine(SDNode *N);
+ SDValue visitFMULForFMADistributiveCombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
@@ -334,12 +335,15 @@ namespace {
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
+ SDValue foldSelectOfConstants(SDNode *N);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC,
bool NotExtCompare = false);
+ SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3, ISD::CondCode CC);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, bool foldBooleans = true);
@@ -356,6 +360,7 @@ namespace {
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
+ SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
@@ -374,9 +379,14 @@ namespace {
SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue splitMergedValStore(StoreSDNode *ST);
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
+ SDValue reduceBuildVecToShuffle(SDNode *N);
+ SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask,
+ SDValue VecIn1, SDValue VecIn2,
+ unsigned LeftIdx);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -444,10 +454,11 @@ namespace {
/// This is a helper function for MergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store.
- /// \return True if a merged store was created.
- bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
- EVT MemVT, unsigned NumStores,
- bool IsConstantSrc, bool UseVector);
+ /// \return number of stores that were merged into a merged store (always
+ /// a prefix of \p StoreNode).
+ bool MergeStoresOfConstantsOrVecElts(
+ SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector);
/// This is a helper function for MergeConsecutiveStores.
/// Stores that may be merged are placed in StoreNodes.
@@ -464,8 +475,10 @@ namespace {
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
- /// \return True if some memory operations were changed.
- bool MergeConsecutiveStores(StoreSDNode *N);
+ /// \return number of stores that were merged into a merged store (the
+ /// affected nodes are stored as a prefix in \p StoreNodes).
+ bool MergeConsecutiveStores(StoreSDNode *N,
+ SmallVectorImpl<MemOpLink> &StoreNodes);
/// \brief Try to transform a truncation where C is a constant:
/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
@@ -536,10 +549,6 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
((DAGCombiner*)DC)->AddToWorklist(N);
}
-void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
- ((DAGCombiner*)DC)->removeFromWorklist(N);
-}
-
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
@@ -620,7 +629,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options->UnsafeFPMath) return 0;
+ if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros())
+ return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
return 1;
@@ -683,9 +693,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
LegalOperations, Depth+1),
Op.getOperand(0), Flags);
case ISD::FSUB:
- // We can't turn -(A-B) into B-A when we honor signed zeros.
- assert(Options.UnsafeFPMath);
-
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
if (N0CFP->isZero())
@@ -726,6 +733,15 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
}
+// APInts must be the same size for most operations, this helper
+// function zero extends the shorter of the pair so that they match.
+// We provide an Offset so that we can create bitwidths that won't overflow.
+static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
+ unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
+ LHS = LHS.zextOrSelf(Bits);
+ RHS = RHS.zextOrSelf(Bits);
+}
+
// Return true if this node is a setcc, or is a select_cc
// that selects between the target values used for true and false, making it
// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
@@ -775,42 +791,61 @@ static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
return nullptr;
}
-// \brief Returns the SDNode if it is a constant splat BuildVector or constant
-// int.
-static ConstantSDNode *isConstOrConstSplat(SDValue N) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
- return CN;
-
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
- BitVector UndefElements;
- ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
-
- // BuildVectors can truncate their operands. Ignore that case here.
- // FIXME: We blindly ignore splats which include undef which is overly
- // pessimistic.
- if (CN && UndefElements.none() &&
- CN->getValueType(0) == N.getValueType().getScalarType())
- return CN;
+// Determines if it is a constant integer or a build vector of constant
+// integers (and undefs).
+// Do not permit build vector implicit truncation.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
+ return !(Const->isOpaque() && NoOpaques);
+ if (N.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
+ if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
+ (Const->isOpaque() && NoOpaques))
+ return false;
}
-
- return nullptr;
+ return true;
}
-// \brief Returns the SDNode if it is a constant splat BuildVector or constant
-// float.
-static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
- if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
- return CN;
+// Determines if it is a constant null integer or a splatted vector of a
+// constant null integer (with no undefs).
+// Build vector implicit truncation is not an issue for null values.
+static bool isNullConstantOrNullSplatConstant(SDValue N) {
+ if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+ return Splat->isNullValue();
+ return false;
+}
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
- BitVector UndefElements;
- ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
+// Determines if it is a constant integer of one or a splatted vector of a
+// constant integer of one (with no undefs).
+// Do not permit build vector implicit truncation.
+static bool isOneConstantOrOneSplatConstant(SDValue N) {
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+ return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
+ return false;
+}
- if (CN && UndefElements.none())
- return CN;
- }
+// Determines if it is a constant integer of all ones or a splatted vector of a
+// constant integer of all ones (with no undefs).
+// Do not permit build vector implicit truncation.
+static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+ return Splat->isAllOnesValue() &&
+ Splat->getAPIntValue().getBitWidth() == BitWidth;
+ return false;
+}
- return nullptr;
+// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
+// undef's.
+static bool isAnyConstantBuildVector(const SDNode *N) {
+ return ISD::isBuildVectorOfConstantSDNodes(N) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(N);
}
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
@@ -935,9 +970,9 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
}
void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
- SDLoc dl(Load);
+ SDLoc DL(Load);
EVT VT = Load->getValueType(0);
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
DEBUG(dbgs() << "\nReplacing.9 ";
Load->dump(&DAG);
@@ -953,7 +988,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
Replace = false;
- SDLoc dl(Op);
+ SDLoc DL(Op);
if (ISD::isUNINDEXEDLoad(Op.getNode())) {
LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT MemVT = LD->getMemoryVT();
@@ -962,7 +997,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
: ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
- return DAG.getExtLoad(ExtType, dl, PVT,
+ return DAG.getExtLoad(ExtType, DL, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
}
@@ -971,30 +1006,30 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
switch (Opc) {
default: break;
case ISD::AssertSext:
- return DAG.getNode(ISD::AssertSext, dl, PVT,
+ return DAG.getNode(ISD::AssertSext, DL, PVT,
SExtPromoteOperand(Op.getOperand(0), PVT),
Op.getOperand(1));
case ISD::AssertZext:
- return DAG.getNode(ISD::AssertZext, dl, PVT,
+ return DAG.getNode(ISD::AssertZext, DL, PVT,
ZExtPromoteOperand(Op.getOperand(0), PVT),
Op.getOperand(1));
case ISD::Constant: {
unsigned ExtOpc =
Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- return DAG.getNode(ExtOpc, dl, PVT, Op);
+ return DAG.getNode(ExtOpc, DL, PVT, Op);
}
}
if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
return SDValue();
- return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+ return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
}
SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
return SDValue();
EVT OldVT = Op.getValueType();
- SDLoc dl(Op);
+ SDLoc DL(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (!NewOp.getNode())
@@ -1003,13 +1038,13 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
DAG.getValueType(OldVT));
}
SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
EVT OldVT = Op.getValueType();
- SDLoc dl(Op);
+ SDLoc DL(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (!NewOp.getNode())
@@ -1018,7 +1053,7 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
- return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+ return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
}
/// Promote the specified integer binary operation if the target indicates it is
@@ -1072,9 +1107,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
- SDLoc dl(Op);
- return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(Opc, dl, PVT, NN0, NN1));
+ SDLoc DL(Op);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getNode(Opc, DL, PVT, NN0, NN1));
}
return SDValue();
}
@@ -1119,9 +1154,9 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
- SDLoc dl(Op);
- return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+ SDLoc DL(Op);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1)));
}
return SDValue();
}
@@ -1178,7 +1213,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
- SDLoc dl(Op);
+ SDLoc DL(Op);
SDNode *N = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
@@ -1186,10 +1221,10 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
: ISD::EXTLOAD)
: LD->getExtensionType();
- SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+ SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
- SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
DEBUG(dbgs() << "\nPromoting ";
N->dump(&DAG);
@@ -1315,7 +1350,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
continue;
assert(N->getOpcode() != ISD::DELETED_NODE &&
- RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+ RV.getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
DEBUG(dbgs() << " ... into: ";
@@ -1562,8 +1597,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
break;
case ISD::TokenFactor:
- if (Op.hasOneUse() &&
- std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+ if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
// Queue up for processing.
TFs.push_back(Op.getNode());
// Clean up in case the token factor is removed.
@@ -1571,7 +1605,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
Changed = true;
break;
}
- // Fall thru
+ LLVM_FALLTHROUGH;
default:
// Only add if it isn't already in the list.
@@ -1634,6 +1668,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
@@ -1650,61 +1685,73 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// fold (add x, undef) -> undef
if (N0.isUndef())
return N0;
+
if (N1.isUndef())
return N1;
+
if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
// canonicalize constant to RHS
if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
+ return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
// fold (add c1, c2) -> c1+c2
- return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT,
- N0.getNode(), N1.getNode());
+ return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
+ N1.getNode());
}
+
// fold (add x, 0) -> x
if (isNullConstant(N1))
return N0;
+
// fold ((c1-A)+c2) -> (c1+c2)-A
- if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) {
+ if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
if (N0.getOpcode() == ISD::SUB)
- if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
- SDLoc DL(N);
+ if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(N1C->getAPIntValue()+
- N0C->getAPIntValue(), DL, VT),
+ DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
N0.getOperand(1));
}
}
+
// reassociate add
- if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
+ if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
return RADD;
+
// fold ((0-A) + B) -> B-A
- if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
+ if (N0.getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N0.getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+
// fold (A + (0-B)) -> A-B
- if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
+ if (N1.getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N1.getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
+
// fold (A+(B-A)) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
return N1.getOperand(0);
+
// fold ((B-A)+A) -> B
if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
return N0.getOperand(0);
+
// fold (A+(B-(A+C))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(0))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
N1.getOperand(1).getOperand(1));
+
// fold (A+(B-(C+A))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(1))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
N1.getOperand(1).getOperand(0));
+
// fold (A+((B-A)+or-C)) to (B+or-C)
if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
N1.getOperand(0).getOpcode() == ISD::SUB &&
N0 == N1.getOperand(0).getOperand(1))
- return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
- N1.getOperand(0).getOperand(0), N1.getOperand(1));
+ return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
+ N1.getOperand(1));
// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
@@ -1713,52 +1760,50 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N10 = N1.getOperand(0);
SDValue N11 = N1.getOperand(1);
- if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
+ if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
+ return DAG.getNode(ISD::SUB, DL, VT,
DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
}
- if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (a+b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
- VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+ VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, DL, VT, N0, N1);
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstant(N1.getOperand(0).getOperand(0)))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
- DAG.getNode(ISD::SHL, SDLoc(N), VT,
+ isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N0,
+ DAG.getNode(ISD::SHL, DL, VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstant(N0.getOperand(0).getOperand(0)))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
- DAG.getNode(ISD::SHL, SDLoc(N), VT,
+ isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N1,
+ DAG.getNode(ISD::SHL, DL, VT,
N0.getOperand(0).getOperand(1),
N0.getOperand(1)));
if (N1.getOpcode() == ISD::AND) {
SDValue AndOp0 = N1.getOperand(0);
unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
- unsigned DestBits = VT.getScalarType().getSizeInBits();
+ unsigned DestBits = VT.getScalarSizeInBits();
// (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
// and similar xforms where the inner op is either ~0 or 0.
- if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
- SDLoc DL(N);
+ if (NumSignBits == DestBits &&
+ isOneConstantOrOneSplatConstant(N1->getOperand(1)))
return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
- }
}
// add (sext i1), X -> sub X, (zext i1)
if (N0.getOpcode() == ISD::SIGN_EXTEND &&
N0.getOperand(0).getValueType() == MVT::i1 &&
!TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
- SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
@@ -1767,7 +1812,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
if (TN->getVT() == MVT::i1) {
- SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
DAG.getConstant(1, DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
@@ -1853,6 +1897,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
@@ -1867,62 +1912,97 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// fold (sub x, x) -> 0
// FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
- return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
+ return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
// fold (sub c1, c2) -> c1-c2
- return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT,
- N0.getNode(), N1.getNode());
+ return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
+ N1.getNode());
}
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+
// fold (sub x, c) -> (add x, -c)
if (N1C) {
- SDLoc DL(N);
return DAG.getNode(ISD::ADD, DL, VT, N0,
DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
}
+
+ if (isNullConstantOrNullSplatConstant(N0)) {
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ // Right-shifting everything out but the sign bit followed by negation is
+ // the same as flipping arithmetic/logical shift type without the negation:
+ // -(X >>u 31) -> (X >>s 31)
+ // -(X >>s 31) -> (X >>u 31)
+ if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
+ ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
+ if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
+ auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
+ if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
+ return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
+ }
+ }
+
+ // 0 - X --> 0 if the sub is NUW.
+ if (N->getFlags()->hasNoUnsignedWrap())
+ return N0;
+
+ if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) {
+ // N1 is either 0 or the minimum signed value. If the sub is NSW, then
+ // N1 must be 0 because negating the minimum signed value is undefined.
+ if (N->getFlags()->hasNoSignedWrap())
+ return N0;
+
+ // 0 - X --> X if X is 0 or the minimum signed value.
+ return N1;
+ }
+ }
+
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
- if (isAllOnesConstant(N0))
- return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
+ if (isAllOnesConstantOrAllOnesSplatConstant(N0))
+ return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+
// fold A-(A-B) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
return N1.getOperand(1);
+
// fold (A+B)-A -> B
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
return N0.getOperand(1);
+
// fold (A+B)-B -> A
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
+
// fold C2-(A+C1) -> (C2-C1)-A
- ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
- dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
- if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
- SDLoc DL(N);
- SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
- DL, VT);
- return DAG.getNode(ISD::SUB, DL, VT, NewC,
- N1.getOperand(0));
+ if (N1.getOpcode() == ISD::ADD) {
+ SDValue N11 = N1.getOperand(1);
+ if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
+ SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
+ return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
+ }
}
+
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
(N0.getOperand(1).getOpcode() == ISD::SUB ||
N0.getOperand(1).getOpcode() == ISD::ADD) &&
N0.getOperand(1).getOperand(0) == N1)
- return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1).getOperand(1));
+ return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
+ N0.getOperand(1).getOperand(1));
+
// fold ((A+(C+B))-B) -> A+C
- if (N0.getOpcode() == ISD::ADD &&
- N0.getOperand(1).getOpcode() == ISD::ADD &&
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
N0.getOperand(1).getOperand(1) == N1)
- return DAG.getNode(ISD::ADD, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1).getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
+ N0.getOperand(1).getOperand(0));
+
// fold ((A-(B-C))-C) -> A-B
- if (N0.getOpcode() == ISD::SUB &&
- N0.getOperand(1).getOpcode() == ISD::SUB &&
+ if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
N0.getOperand(1).getOperand(1) == N1)
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1).getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+ N0.getOperand(1).getOperand(0));
// If either operand of a sub is undef, the result is undef
if (N0.isUndef())
@@ -1937,19 +2017,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N1C && GA->getOpcode() == ISD::GlobalAddress)
return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
GA->getOffset() -
- (uint64_t)N1C->getSExtValue());
+ (uint64_t)N1C->getSExtValue());
// fold (sub Sym+c1, Sym+c2) -> c1-c2
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
if (GA->getGlobal() == GB->getGlobal())
return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
- SDLoc(N), VT);
+ DL, VT);
}
// sub X, (sextinreg Y i1) -> add X, (and Y 1)
if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
if (TN->getVT() == MVT::i1) {
- SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
DAG.getConstant(1, DL, VT));
return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
@@ -2048,7 +2127,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// We require a splat of the entire scalar bit width for non-contiguous
// bit patterns.
bool IsFullSplat =
- ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
+ ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
// fold (mul x, 1) -> x
if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
return N0;
@@ -2080,28 +2159,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
getShiftAmountTy(N0.getValueType()))));
}
- APInt Val;
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
- if (N1IsConst && N0.getOpcode() == ISD::SHL &&
- (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
- isa<ConstantSDNode>(N0.getOperand(1)))) {
+ if (N0.getOpcode() == ISD::SHL &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
- AddToWorklist(C3.getNode());
- return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
+ if (isConstantOrConstantVector(C3))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
}
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
// use.
{
SDValue Sh(nullptr, 0), Y(nullptr, 0);
+
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
if (N0.getOpcode() == ISD::SHL &&
- (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
- isa<ConstantSDNode>(N0.getOperand(1))) &&
+ isConstantOrConstantVector(N0.getOperand(1)) &&
N0.getNode()->hasOneUse()) {
Sh = N0; Y = N1;
} else if (N1.getOpcode() == ISD::SHL &&
- isa<ConstantSDNode>(N1.getOperand(1)) &&
+ isConstantOrConstantVector(N1.getOperand(1)) &&
N1.getNode()->hasOneUse()) {
Sh = N1; Y = N0;
}
@@ -2188,8 +2266,8 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
SDValue Op1 = Node->getOperand(1);
SDValue combined;
for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ UE = Op0.getNode()->use_end(); UI != UE;) {
+ SDNode *User = *UI++;
if (User == Node || User->use_empty())
continue;
// Convert the other matching node(s), too;
@@ -2246,10 +2324,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
- if (!VT.isVector()) {
- if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
- }
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
// fold (sdiv X, pow2) -> simple ops after legalize
// FIXME: We check for the exact bit here because the generic lowering gives
@@ -2302,8 +2378,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return Op;
// sdiv, srem -> sdivrem
- // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
- // Otherwise, we break the simplification logic in visitREM().
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+ // true. Otherwise, we break the simplification logic in visitREM().
if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue DivRem = useDivRem(N))
return DivRem;
@@ -2337,25 +2413,33 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
N0C, N1C))
return Folded;
+
// fold (udiv x, (1 << c)) -> x >>u c
- if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
- return DAG.getNode(ISD::SRL, DL, VT, N0,
- DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
- getShiftAmountTy(N0.getValueType())));
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1)) {
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ AddToWorklist(Trunc.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ }
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
- if (SHC->getAPIntValue().isPowerOf2()) {
- EVT ADDVT = N1.getOperand(1).getValueType();
- SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
- N1.getOperand(1),
- DAG.getConstant(SHC->getAPIntValue()
- .logBase2(),
- DL, ADDVT));
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
- }
+ SDValue N10 = N1.getOperand(0);
+ if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N10)) {
+ SDValue LogBase2 = BuildLogBase2(N10, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
+ AddToWorklist(Trunc.getNode());
+ SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
}
}
@@ -2366,8 +2450,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
return Op;
// sdiv, srem -> sdivrem
- // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
- // Otherwise, we break the simplification logic in visitREM().
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+ // true. Otherwise, we break the simplification logic in visitREM().
if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue DivRem = useDivRem(N))
return DivRem;
@@ -2401,27 +2485,25 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (isSigned) {
// If we know the sign bits of both operands are zero, strength reduce to a
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
- if (!VT.isVector()) {
- if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
- }
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
} else {
// fold (urem x, pow2) -> (and x, pow2-1)
- if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
- N1C->getAPIntValue().isPowerOf2()) {
- return DAG.getNode(ISD::AND, DL, VT, N0,
- DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+ if (DAG.isKnownToBeAPowerOfTwo(N1)) {
+ APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
- if (N1.getOpcode() == ISD::SHL) {
- ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0));
- if (SHC && SHC->getAPIntValue().isPowerOf2()) {
- APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits());
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::AND, DL, VT, N0, Add);
- }
+ if (N1.getOpcode() == ISD::SHL &&
+ DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
+ APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
}
@@ -2477,8 +2559,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (isOneConstant(N1)) {
SDLoc DL(N);
return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
- DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
- DL,
+ DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
}
// fold (mulhs x, undef) -> 0
@@ -2706,7 +2787,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
// Bail early if none of these transforms apply.
- if (N0.getNode()->getNumOperands() == 0) return SDValue();
+ if (N0.getNumOperands() == 0) return SDValue();
// For each of OP in AND/OR/XOR:
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
@@ -2872,25 +2953,34 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
LL.getValueType().isInteger()) {
// fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ EVT CCVT = getSetCCResultType(LR.getValueType());
+ if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
}
if (isAllOnesConstant(LR)) {
// fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
if (Op1 == ISD::SETEQ) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ EVT CCVT = getSetCCResultType(LR.getValueType());
+ if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ANDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ }
}
// fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
if (Op1 == ISD::SETGT) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ EVT CCVT = getSetCCResultType(LR.getValueType());
+ if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
}
}
}
@@ -2899,14 +2989,17 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
Op0 == Op1 && LL.getValueType().isInteger() &&
Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
(isAllOnesConstant(LR) && isNullConstant(RR)))) {
- SDLoc DL(N0);
- SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
- LL, DAG.getConstant(1, DL,
- LL.getValueType()));
- AddToWorklist(ADDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
- DAG.getConstant(2, DL, LL.getValueType()),
- ISD::SETUGE);
+ EVT CCVT = getSetCCResultType(LL.getValueType());
+ if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+ SDLoc DL(N0);
+ SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
+ LL, DAG.getConstant(1, DL,
+ LL.getValueType()));
+ AddToWorklist(ADDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
+ DAG.getConstant(2, DL, LL.getValueType()),
+ ISD::SETUGE);
+ }
}
// canonicalize equivalent to ll == rl
if (LL == RR && LR == RL) {
@@ -2967,6 +3060,11 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
unsigned Size = VT.getSizeInBits();
const APInt &AndMask = CAnd->getAPIntValue();
unsigned ShiftBits = CShift->getZExtValue();
+
+ // Bail out, this node will probably disappear anyway.
+ if (ShiftBits == 0)
+ return SDValue();
+
unsigned MaskBits = AndMask.countTrailingOnes();
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
@@ -2985,7 +3083,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
// extended to handle extensions mixed in.
SDValue SL(N0);
- assert(ShiftBits != 0 && MaskBits <= Size);
+ assert(MaskBits <= Size);
// Extracting the highest bit of the low half.
EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
@@ -3050,6 +3148,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N1.getValueType();
+ // x & x --> x
+ if (N0 == N1)
+ return N0;
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -3058,16 +3160,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and x, 0) -> 0, vector edition
if (ISD::isBuildVectorAllZeros(N0.getNode()))
// do not return N0, because undef node may exist in N0
- return DAG.getConstant(
- APInt::getNullValue(
- N0.getValueType().getScalarType().getSizeInBits()),
- SDLoc(N), N0.getValueType());
+ return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
+ SDLoc(N), N0.getValueType());
if (ISD::isBuildVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getConstant(
- APInt::getNullValue(
- N1.getValueType().getScalarType().getSizeInBits()),
- SDLoc(N), N1.getValueType());
+ return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
+ SDLoc(N), N1.getValueType());
// fold (and x, -1) -> x, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
@@ -3078,7 +3176,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and c1, c2) -> c1&c2
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
@@ -3089,7 +3187,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (isAllOnesConstant(N1))
return N0;
// if (and x, c) is known to be zero, return 0
- unsigned BitWidth = VT.getScalarType().getSizeInBits();
+ unsigned BitWidth = VT.getScalarSizeInBits();
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
@@ -3098,14 +3196,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return RAND;
// fold (and (or x, C), D) -> D if (C & D) == D
if (N1C && N0.getOpcode() == ISD::OR)
- if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
return N1;
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
APInt Mask = ~N1C->getAPIntValue();
- Mask = Mask.trunc(N0Op0.getValueSizeInBits());
+ Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
N0.getValueType(), N0Op0);
@@ -3156,7 +3254,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// that will apply equally to all members of the vector, so AND all the
// lanes of the constant together.
EVT VT = Vector->getValueType(0);
- unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+ unsigned BitWidth = VT.getScalarSizeInBits();
// If the splat value has been compressed to a bitlength lower
// than the size of the vector lane, we need to re-expand it to
@@ -3187,8 +3285,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Resize the constant to the same size as the original memory access before
// extension. If it is still the AllOnesValue then this AND is completely
// unneeded.
- Constant =
- Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+ Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
bool B;
switch (Load->getExtensionType()) {
@@ -3230,9 +3327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
// fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
- if (N1C && (N0.getOpcode() == ISD::LOAD ||
- (N0.getOpcode() == ISD::ANY_EXTEND &&
- N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+ if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD))) {
bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
LoadSDNode *LN0 = HasAnyExt
? cast<LoadSDNode>(N0.getOperand(0))
@@ -3293,10 +3390,29 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
return Tmp;
+ // Masking the negated extension of a boolean is just the zero-extended
+ // boolean:
+ // and (sub 0, zext(bool X)), 1 --> zext(bool X)
+ // and (sub 0, sext(bool X)), 1 --> zext(bool X)
+ //
+ // Note: the SimplifyDemandedBits fold below can make an information-losing
+ // transform, and then we have no way to find this better fold.
+ if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
+ ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
+ SDValue SubRHS = N0.getOperand(1);
+ if (SubLHS && SubLHS->isNullValue()) {
+ if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
+ SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
+ return SubRHS;
+ if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
+ SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
+ }
+ }
+
// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
// fold (and (sra)) -> (and (srl)) when possible.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
+ if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (zext_inreg (extload x)) -> (zextload x)
@@ -3305,9 +3421,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ unsigned BitWidth = N1.getScalarValueSizeInBits();
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ BitWidth - MemVT.getScalarSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
@@ -3325,9 +3441,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ unsigned BitWidth = N1.getScalarValueSizeInBits();
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ BitWidth - MemVT.getScalarSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
@@ -3391,8 +3507,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
return SDValue();
- if (!N0.getNode()->hasOneUse() ||
- !N1.getNode()->hasOneUse())
+ if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -3627,18 +3742,24 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
// fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
// fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ EVT CCVT = getSetCCResultType(LR.getValueType());
+ if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
}
// fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
// fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ EVT CCVT = getSetCCResultType(LR.getValueType());
+ if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ANDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ }
}
}
// canonicalize equivalent to ll == rl
@@ -3708,6 +3829,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N1.getValueType();
+ // x | x --> x
+ if (N0 == N1)
+ return N0;
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -3723,15 +3848,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (ISD::isBuildVectorAllOnes(N0.getNode()))
// do not return N0, because undef node may exist in N0
return DAG.getConstant(
- APInt::getAllOnesValue(
- N0.getValueType().getScalarType().getSizeInBits()),
- SDLoc(N), N0.getValueType());
+ APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N),
+ N0.getValueType());
if (ISD::isBuildVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getConstant(
- APInt::getAllOnesValue(
- N1.getValueType().getScalarType().getSizeInBits()),
- SDLoc(N), N1.getValueType());
+ APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N),
+ N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
// Do this only if the resulting shuffle is legal.
@@ -4122,6 +4245,110 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
return nullptr;
}
+namespace {
+/// Helper struct to parse and store a memory address as base + index + offset.
+/// We ignore sign extensions when it is safe to do so.
+/// The following two expressions are not equivalent. To differentiate we need
+/// to store whether there was a sign extension involved in the index
+/// computation.
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (add (i8 load %index)
+/// (i8 1))))
+/// vs
+///
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
+/// (i32 1)))))
+struct BaseIndexOffset {
+ SDValue Base;
+ SDValue Index;
+ int64_t Offset;
+ bool IsIndexSignExt;
+
+ BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
+
+ BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
+ bool IsIndexSignExt) :
+ Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
+
+ bool equalBaseIndex(const BaseIndexOffset &Other) {
+ return Other.Base == Base && Other.Index == Index &&
+ Other.IsIndexSignExt == IsIndexSignExt;
+ }
+
+ /// Parses tree in Ptr for base, index, offset addresses.
+ static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
+ int64_t PartialOffset = 0) {
+ bool IsIndexSignExt = false;
+
+ // Split up a folded GlobalAddress+Offset into its component parts.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
+ if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
+ return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
+ SDLoc(GA),
+ GA->getValueType(0),
+ /*Offset=*/PartialOffset,
+ /*isTargetGA=*/false,
+ GA->getTargetFlags()),
+ SDValue(),
+ GA->getOffset(),
+ IsIndexSignExt);
+ }
+
+ // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
+ // instruction, then it could be just the BASE or everything else we don't
+ // know how to handle. Just use Ptr as BASE and give up.
+ if (Ptr->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
+
+ // We know that we have at least an ADD instruction. Try to pattern match
+ // the simple case of BASE + OFFSET.
+ if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
+ int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+ return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
+ }
+
+ // Inside a loop the current BASE pointer is calculated using an ADD and a
+ // MUL instruction. In this case Ptr is the actual BASE pointer.
+ // (i64 add (i64 %array_ptr)
+ // (i64 mul (i64 %induction_var)
+ // (i64 %element_size)))
+ if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
+ return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
+
+ // Look at Base + Index + Offset cases.
+ SDValue Base = Ptr->getOperand(0);
+ SDValue IndexOffset = Ptr->getOperand(1);
+
+ // Skip signextends.
+ if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
+ IndexOffset = IndexOffset->getOperand(0);
+ IsIndexSignExt = true;
+ }
+
+ // Either the case of Base + Index (no offset) or something else.
+ if (IndexOffset->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
+
+ // Now we have the case of Base + Index + offset.
+ SDValue Index = IndexOffset->getOperand(0);
+ SDValue Offset = IndexOffset->getOperand(1);
+
+ if (!isa<ConstantSDNode>(Offset))
+ return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
+
+ // Ignore signextends.
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ } else IsIndexSignExt = false;
+
+ int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
+ return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
+ }
+};
+} // namespace
+
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4317,16 +4544,20 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
if (!BinOpCst) return SDValue();
- // FIXME: disable this unless the input to the binop is a shift by a constant.
- // If it is not a shift, it pessimizes some common cases like:
- //
- // void foo(int *X, int i) { X[i & 1235] = 1; }
- // int bar(int *X, int i) { return X[i & 255]; }
+ // FIXME: disable this unless the input to the binop is a shift by a constant
+ // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
- if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
- BinOpLHSVal->getOpcode() != ISD::SRA &&
- BinOpLHSVal->getOpcode() != ISD::SRL) ||
- !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+ bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
+ BinOpLHSVal->getOpcode() == ISD::SRA ||
+ BinOpLHSVal->getOpcode() == ISD::SRL;
+ bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
+ BinOpLHSVal->getOpcode() == ISD::SELECT;
+
+ if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
+ !isCopyOrSelect)
+ return SDValue();
+
+ if (isCopyOrSelect && N->hasOneUse())
return SDValue();
EVT VT = N->getValueType(0);
@@ -4366,19 +4597,15 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
// (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
SDValue N01 = N->getOperand(0).getOperand(1);
-
- if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
- if (!N01C->isOpaque()) {
- EVT TruncVT = N->getValueType(0);
- SDValue N00 = N->getOperand(0).getOperand(0);
- APInt TruncC = N01C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
- SDLoc DL(N);
-
- return DAG.getNode(ISD::AND, DL, TruncVT,
- DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
- DAG.getConstant(TruncC, DL, TruncVT));
- }
+ if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
+ SDLoc DL(N);
+ EVT TruncVT = N->getValueType(0);
+ SDValue N00 = N->getOperand(0).getOperand(0);
+ SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
+ SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
+ AddToWorklist(Trunc00.getNode());
+ AddToWorklist(Trunc01.getNode());
+ return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
}
}
@@ -4404,7 +4631,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
@@ -4425,12 +4651,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
N01CV, N1CV))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
}
- } else {
- N1C = isConstOrConstSplat(N1);
}
}
}
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
// fold (shl c1, c2) -> c1<<c2
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
@@ -4464,13 +4690,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SHL) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t c1 = N0C1->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
SDLoc DL(N);
- if (c1 + c2 >= OpSizeInBits)
+ APInt c1 = N0C1->getAPIntValue();
+ APInt c2 = N1C->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
+ APInt Sum = c1 + c2;
+ if (Sum.uge(OpSizeInBits))
return DAG.getConstant(0, DL, VT);
- return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+
+ return DAG.getNode(
+ ISD::SHL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
}
}
@@ -4485,18 +4716,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
N0.getOperand(0).getOpcode() == ISD::SHL) {
SDValue N0Op0 = N0.getOperand(0);
if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
- uint64_t c1 = N0Op0C1->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
+ APInt c1 = N0Op0C1->getAPIntValue();
+ APInt c2 = N1C->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
EVT InnerShiftVT = N0Op0.getValueType();
uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
- if (c2 >= OpSizeInBits - InnerShiftSize) {
+ if (c2.uge(OpSizeInBits - InnerShiftSize)) {
SDLoc DL(N0);
- if (c1 + c2 >= OpSizeInBits)
+ APInt Sum = c1 + c2;
+ if (Sum.uge(OpSizeInBits))
return DAG.getConstant(0, DL, VT);
- return DAG.getNode(ISD::SHL, DL, VT,
- DAG.getNode(N0.getOpcode(), DL, VT,
- N0Op0->getOperand(0)),
- DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+
+ return DAG.getNode(
+ ISD::SHL, DL, VT,
+ DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
+ DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
}
}
}
@@ -4508,8 +4743,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
N0.getOperand(0).getOpcode() == ISD::SRL) {
SDValue N0Op0 = N0.getOperand(0);
if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
- uint64_t c1 = N0Op0C1->getZExtValue();
- if (c1 < VT.getScalarSizeInBits()) {
+ if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
+ uint64_t c1 = N0Op0C1->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
if (c1 == c2) {
SDValue NewOp0 = N0.getOperand(0);
@@ -4569,37 +4804,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}
}
+
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
- if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+ if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
+ isConstantOrConstantVector(N1, /* No Opaques */ true)) {
unsigned BitSize = VT.getScalarSizeInBits();
SDLoc DL(N);
- SDValue HiBitsMask =
- DAG.getConstant(APInt::getHighBitsSet(BitSize,
- BitSize - N1C->getZExtValue()),
- DL, VT);
- return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
- HiBitsMask);
+ SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT);
+ SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
}
// fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
- APInt Val;
- if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
- (isa<ConstantSDNode>(N0.getOperand(1)) ||
- ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ isConstantOrConstantVector(N1, /* No Opaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+ AddToWorklist(Shl0.getNode());
+ AddToWorklist(Shl1.getNode());
return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
}
// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
- if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- if (SDValue Folded =
- DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
- return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
- }
+ if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
+ isConstantOrConstantVector(N1, /* No Opaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
+ SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+ if (isConstantOrConstantVector(Shl))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
}
if (N1C && !N1C->isOpaque())
@@ -4613,16 +4848,18 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
+ return N0;
// fold vector ops
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (VT.isVector()) {
+ if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
- N1C = isConstOrConstSplat(N1);
- }
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (sra c1, c2) -> (sra c1, c2)
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
@@ -4634,8 +4871,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold (sra -1, x) -> -1
if (isAllOnesConstant(N0))
return N0;
- // fold (sra x, (setge c, size(x))) -> undef
- if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ // fold (sra x, c >= size(x)) -> undef
+ if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
return DAG.getUNDEF(VT);
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -4656,13 +4893,19 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SRA) {
- if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
- unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
- if (Sum >= OpSizeInBits)
- Sum = OpSizeInBits - 1;
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
SDLoc DL(N);
- return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
- DAG.getConstant(Sum, DL, N1.getValueType()));
+ APInt c1 = N0C1->getAPIntValue();
+ APInt c2 = N1C->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
+ APInt Sum = c1 + c2;
+ if (Sum.uge(OpSizeInBits))
+ Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
+
+ return DAG.getNode(
+ ISD::SRA, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
}
}
@@ -4759,16 +5002,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (VT.isVector()) {
+ if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
- N1C = isConstOrConstSplat(N1);
- }
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (srl c1, c2) -> c1 >>u c2
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
@@ -4778,7 +5019,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (isNullConstant(N0))
return N0;
// fold (srl x, c >= size(x)) -> undef
- if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
return DAG.getUNDEF(VT);
// fold (srl x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -4790,14 +5031,19 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SRL) {
- if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t c1 = N01C->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
SDLoc DL(N);
- if (c1 + c2 >= OpSizeInBits)
+ APInt c1 = N0C1->getAPIntValue();
+ APInt c2 = N1C->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
+ APInt Sum = c1 + c2;
+ if (Sum.uge(OpSizeInBits))
return DAG.getConstant(0, DL, VT);
- return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+
+ return DAG.getNode(
+ ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
}
}
@@ -4810,7 +5056,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
uint64_t c2 = N1C->getZExtValue();
EVT InnerShiftVT = N0.getOperand(0).getValueType();
EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
- uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
if (c1 + OpSizeInBits == InnerShiftSize) {
SDLoc DL(N0);
@@ -4825,14 +5071,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl (shl x, c), c) -> (and x, cst2)
- if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
- unsigned BitSize = N0.getScalarValueSizeInBits();
- if (BitSize <= 64) {
- uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
- SDLoc DL(N);
- return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
- DAG.getConstant(~0ULL >> ShAmt, DL, VT));
- }
+ if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
+ SDLoc DL(N);
+ APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
+ SDValue Mask =
+ DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1);
+ AddToWorklist(Mask.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
@@ -5065,6 +5311,41 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
+// TODO: We should handle other cases of selecting between {-1,0,1} here.
+SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
+ SDValue Cond = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ EVT CondVT = Cond.getValueType();
+ SDLoc DL(N);
+
+ // fold (select Cond, 0, 1) -> (xor Cond, 1)
+ // We can't do this reliably if integer based booleans have different contents
+ // to floating point based booleans. This is because we can't tell whether we
+ // have an integer-based boolean or a floating-point-based boolean unless we
+ // can find the SETCC that produced it and inspect its operands. This is
+ // fairly easy if C is the SETCC node, but it can potentially be
+ // undiscoverable (or not reasonably discoverable). For example, it could be
+ // in another basic block or it could require searching a complicated
+ // expression.
+ if (VT.isInteger() &&
+ (CondVT == MVT::i1 || (CondVT.isInteger() &&
+ TLI.getBooleanContents(false, true) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ TLI.getBooleanContents(false, false) ==
+ TargetLowering::ZeroOrOneBooleanContent)) &&
+ isNullConstant(N1) && isOneConstant(N2)) {
+ SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
+ DAG.getConstant(1, DL, CondVT));
+ if (VT.bitsEq(CondVT))
+ return NotCond;
+ return DAG.getZExtOrTrunc(NotCond, DL, VT);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5080,39 +5361,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// fold (select false, X, Y) -> Y
return !N0C->isNullValue() ? N1 : N2;
}
- // fold (select C, 1, X) -> (or C, X)
- if (VT == MVT::i1 && isOneConstant(N1))
+ // fold (select X, X, Y) -> (or X, Y)
+ // fold (select X, 1, Y) -> (or C, Y)
+ if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
- // fold (select C, 0, 1) -> (xor C, 1)
- // We can't do this reliably if integer based booleans have different contents
- // to floating point based booleans. This is because we can't tell whether we
- // have an integer-based boolean or a floating-point-based boolean unless we
- // can find the SETCC that produced it and inspect its operands. This is
- // fairly easy if C is the SETCC node, but it can potentially be
- // undiscoverable (or not reasonably discoverable). For example, it could be
- // in another basic block or it could require searching a complicated
- // expression.
- if (VT.isInteger() &&
- (VT0 == MVT::i1 || (VT0.isInteger() &&
- TLI.getBooleanContents(false, false) ==
- TLI.getBooleanContents(false, true) &&
- TLI.getBooleanContents(false, false) ==
- TargetLowering::ZeroOrOneBooleanContent)) &&
- isNullConstant(N1) && isOneConstant(N2)) {
- SDValue XORNode;
- if (VT == VT0) {
- SDLoc DL(N);
- return DAG.getNode(ISD::XOR, DL, VT0,
- N0, DAG.getConstant(1, DL, VT0));
- }
- SDLoc DL0(N0);
- XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
- N0, DAG.getConstant(1, DL0, VT0));
- AddToWorklist(XORNode.getNode());
- if (VT.bitsGT(VT0))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
- }
+
+ if (SDValue V = foldSelectOfConstants(N))
+ return V;
+
// fold (select C, 0, X) -> (and (not C), X)
if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
@@ -5125,16 +5381,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
AddToWorklist(NOTNode.getNode());
return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
}
- // fold (select C, X, 0) -> (and C, X)
- if (VT == MVT::i1 && isNullConstant(N2))
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
- // fold (select X, X, Y) -> (or X, Y)
- // fold (select X, 1, Y) -> (or X, Y)
- if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select X, Y, X) -> (and X, Y)
// fold (select X, Y, 0) -> (and X, Y)
- if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
+ if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
// If we can fold this based on the true/false value, do so.
@@ -5145,7 +5394,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// The code in this block deals with the following 2 equivalences:
// select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
// select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
- // The target can specify its prefered form with the
+ // The target can specify its preferred form with the
// shouldNormalizeToSelectSequence() callback. However we always transform
// to the right anyway if we find the inner select exists in the DAG anyway
// and we always transform to the left side if we know that we can further
@@ -5214,6 +5463,18 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
+ // select (xor Cond, 1), X, Y -> select Cond, Y, X
+ if (VT0 == MVT::i1) {
+ if (N0->getOpcode() == ISD::XOR) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
+ SDValue Cond0 = N0->getOperand(0);
+ if (C->isOne())
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
+ Cond0, N2, N1);
+ }
+ }
+ }
+
// fold selects based on a setcc into other things, such as min/max/abs
if (N0.getOpcode() == ISD::SETCC) {
// select x, y (fcmp lt x, y) -> fminnum x, y
@@ -5269,7 +5530,7 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
// This function assumes all the vselect's arguments are CONCAT_VECTOR
// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
- SDLoc dl(N);
+ SDLoc DL(N);
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
@@ -5316,7 +5577,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function.");
return DAG.getNode(
- ISD::CONCAT_VECTORS, dl, VT,
+ ISD::CONCAT_VECTORS, DL, VT,
BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
}
@@ -5390,6 +5651,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
SDValue Data = MST->getValue();
+ EVT VT = Data.getValueType();
SDLoc DL(N);
// If the MSTORE data type requires splitting and the mask is provided by a
@@ -5399,16 +5661,13 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (Mask.getOpcode() == ISD::SETCC) {
// Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+ if (TLI.getTypeAction(*DAG.getContext(), VT) !=
TargetLowering::TypeSplitVector)
return SDValue();
SDValue MaskLo, MaskHi, Lo, Hi;
std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
-
SDValue Chain = MST->getChain();
SDValue Ptr = MST->getBasePtr();
@@ -5418,8 +5677,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
// if Alignment is equal to the vector size,
// take the half of it for the second part
unsigned SecondHalfAlignment =
- (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
- Alignment/2 : Alignment;
+ (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
@@ -5433,11 +5691,11 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
Alignment, MST->getAAInfo(), MST->getRanges());
Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
- MST->isTruncatingStore());
+ MST->isTruncatingStore(),
+ MST->isCompressingStore());
- unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+ MST->isCompressingStore());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MST->getPointerInfo(),
@@ -5446,7 +5704,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MST->getRanges());
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
- MST->isTruncatingStore());
+ MST->isTruncatingStore(),
+ MST->isCompressingStore());
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
@@ -5585,11 +5844,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
Alignment, MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
- ISD::NON_EXTLOAD);
+ ISD::NON_EXTLOAD, MLD->isExpandingLoad());
- unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+ MLD->isExpandingLoad());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
@@ -5597,7 +5855,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
- ISD::NON_EXTLOAD);
+ ISD::NON_EXTLOAD, MLD->isExpandingLoad());
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
@@ -5625,6 +5883,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N2 = N->getOperand(2);
SDLoc DL(N);
+ // fold (vselect C, X, X) -> X
+ if (N1 == N2)
+ return N1;
+
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
// vselect (setgt X, -1), X, -X ->
@@ -5648,7 +5910,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
EVT VT = LHS.getValueType();
SDValue Shift = DAG.getNode(
ISD::SRA, DL, VT, LHS,
- DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
@@ -5803,7 +6065,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
// We can fold this node into a build_vector.
unsigned VTBits = SVT.getSizeInBits();
- unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
+ unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
SmallVector<SDValue, 8> Elts;
unsigned NumElts = VT.getVectorNumElements();
SDLoc DL(N);
@@ -6026,7 +6288,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
- SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6038,9 +6300,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// See if the value being truncated is already sign extended. If so, just
// eliminate the trunc/sext pair.
SDValue Op = N0.getOperand(0);
- unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
- unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
- unsigned DestBits = VT.getScalarType().getSizeInBits();
+ unsigned OpBits = Op.getScalarValueSizeInBits();
+ unsigned MidBits = N0.getScalarValueSizeInBits();
+ unsigned DestBits = VT.getScalarSizeInBits();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
if (OpBits == DestBits) {
@@ -6201,7 +6463,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
// Here, T can be 1 or -1, depending on the type of the setcc and
// getBooleanContents().
- unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits();
+ unsigned SetCCWidth = N0.getScalarValueSizeInBits();
SDLoc DL(N);
// To determine the "true" side of the select, we need to know the high bit
@@ -6323,7 +6585,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
- SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6338,7 +6600,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
- SDNode *oye = N0.getNode()->getOperand(0).getNode();
+ SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6528,7 +6790,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// elements we can use a matching integer vector type and then
// truncate/sign extend.
EVT MatchingElementType = EVT::getIntegerVT(
- *DAG.getContext(), N00VT.getScalarType().getSizeInBits());
+ *DAG.getContext(), N00VT.getScalarSizeInBits());
EVT MatchingVectorType = EVT::getVectorVT(
*DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
SDValue VsetCC =
@@ -6558,8 +6820,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue InnerZExt = N0.getOperand(0);
// If the original shl may be shifting out bits, do not perform this
// transformation.
- unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
- InnerZExt.getOperand(0).getValueType().getSizeInBits();
+ unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
+ InnerZExt.getOperand(0).getValueSizeInBits();
if (ShAmtVal > KnownZeroBits)
return SDValue();
}
@@ -6598,7 +6860,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
- SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6625,15 +6887,15 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
N0.getOperand(1).getOpcode() == ISD::Constant &&
!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
N0.getValueType())) {
+ SDLoc DL(N);
SDValue X = N0.getOperand(0).getOperand(0);
if (X.getValueType().bitsLT(VT)) {
- X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
+ X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
} else if (X.getValueType().bitsGT(VT)) {
- X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
+ X = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
- SDLoc DL(N);
return DAG.getNode(ISD::AND, DL, VT,
X, DAG.getConstant(Mask, DL, VT));
}
@@ -6820,7 +7082,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if ((ShAmt & (EVTBits-1)) == 0) {
N0 = N0.getOperand(0);
// Is the load width a multiple of size of VT?
- if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
+ if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
return SDValue();
}
@@ -6952,8 +7214,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT EVT = cast<VTSDNode>(N1)->getVT();
- unsigned VTBits = VT.getScalarType().getSizeInBits();
- unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+ unsigned VTBits = VT.getScalarSizeInBits();
+ unsigned EVTBits = EVT.getScalarSizeInBits();
if (N0.isUndef())
return DAG.getUNDEF(VT);
@@ -6977,14 +7239,23 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// if x is small enough.
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+ if (N00.getScalarValueSizeInBits() <= EVTBits &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
+ }
+
+ // fold (sext_in_reg (zext x)) -> (sext x)
+ // iff we are extending the source sign bit.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getScalarValueSizeInBits() == EVTBits &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
}
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
- return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
+ return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
// fold operands of sext_in_reg based on knowledge that the top bits are not
// demanded.
@@ -7111,6 +7382,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return N0.getOperand(0);
}
+ // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
+ return SDValue();
+
// Fold extract-and-trunc into a narrow extract. For example:
// i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
// i32 y = TRUNCATE(i64 x)
@@ -7148,7 +7423,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
// trunc (select c, a, b) -> select c, (trunc a), (trunc b)
- if (N0.getOpcode() == ISD::SELECT) {
+ if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
EVT SrcVT = N0.getValueType();
if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
TLI.isTruncateFree(SrcVT, VT)) {
@@ -7160,15 +7435,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
- // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2
+ // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t Amt = CAmt->getZExtValue();
- unsigned Size = VT.getSizeInBits();
+ unsigned Size = VT.getScalarSizeInBits();
- if (Amt < Size / 2) {
+ if (Amt < Size) {
SDLoc SL(N);
EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
@@ -7525,7 +7800,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(0)) &&
VT.isInteger() && !VT.isVector()) {
- unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+ unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
if (isTypeLegal(IntXVT)) {
SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
@@ -7848,10 +8123,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((AllowFusion || HasFMAD) && Aggressive) {
+ if (Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (N0.getOpcode() == PreferredFusedOpcode &&
- N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N0.getOpcode() == PreferredFusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -7861,8 +8140,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (N1->getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N1->getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL &&
+ N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8090,11 +8373,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((AllowFusion || HasFMAD) && Aggressive) {
+ if (Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode &&
- N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N0.getOpcode() == PreferredFusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8106,7 +8393,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (N1.getOpcode() == PreferredFusedOpcode &&
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
@@ -8221,8 +8511,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return SDValue();
}
-/// Try to perform FMA combining on a given FMUL node.
-SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
+/// Try to perform FMA combining on a given FMUL node based on the distributive
+/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
+/// subtraction instead of addition).
+SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
@@ -8231,17 +8523,23 @@ SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
const TargetOptions &Options = DAG.getTarget().Options;
- bool AllowFusion =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
- // Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+ // The transforms below are incorrect when x == 0 and y == inf, because the
+ // intermediate multiplication produces a nan.
+ if (!Options.NoInfsFPMath)
+ return SDValue();
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+ // Floating-point multiply-add with intermediate rounding. This can result
+ // in a less precise result due to the changed rounding order.
+ bool HasFMAD = Options.UnsafeFPMath &&
+ (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
@@ -8338,17 +8636,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getNode(ISD::FSUB, DL, VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations), Flags);
+ // FIXME: Auto-upgrade the target/function-level option.
+ if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+ // fold (fadd A, 0) -> A
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
+ if (N1C->isZero())
+ return N0;
+ }
+
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
// No FP constant should be created after legalization as Instruction
// Selection pass has a hard time dealing with FP constants.
bool AllowNewConst = (Level < AfterLegalizeDAG);
- // fold (fadd A, 0) -> A
- if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
- if (N1C->isZero())
- return N0;
-
// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
@@ -8457,7 +8758,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
EVT VT = N->getValueType(0);
- SDLoc dl(N);
+ SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
@@ -8468,30 +8769,33 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
- return DAG.getNode(ISD::FADD, dl, VT, N0,
+ return DAG.getNode(ISD::FADD, DL, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations), Flags);
- // If 'unsafe math' is enabled, fold lots of things.
- if (Options.UnsafeFPMath) {
- // (fsub A, 0) -> A
- if (N1CFP && N1CFP->isZero())
- return N0;
-
+ // FIXME: Auto-upgrade the target/function-level option.
+ if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
// (fsub 0, B) -> -B
if (N0CFP && N0CFP->isZero()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, dl, VT, N1);
+ return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
}
+ }
+
+ // If 'unsafe math' is enabled, fold lots of things.
+ if (Options.UnsafeFPMath) {
+ // (fsub A, 0) -> A
+ if (N1CFP && N1CFP->isZero())
+ return N0;
// (fsub x, x) -> 0.0
if (N0 == N1)
- return DAG.getConstantFP(0.0f, dl, VT);
+ return DAG.getConstantFP(0.0f, DL, VT);
// (fsub x, (fadd x, y)) -> (fneg y)
// (fsub x, (fadd y, x)) -> (fneg y)
@@ -8611,7 +8915,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
// FMUL -> FMA combines:
- if (SDValue Fused = visitFMULForFMACombine(N)) {
+ if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
@@ -8626,14 +8930,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
- SDLoc dl(N);
+ SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
// Constant fold FMA.
if (isa<ConstantFPSDNode>(N0) &&
isa<ConstantFPSDNode>(N1) &&
isa<ConstantFPSDNode>(N2)) {
- return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
+ return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}
if (Options.UnsafeFPMath) {
@@ -8663,8 +8967,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
isConstantFPBuildVectorOrConstantFP(N1) &&
isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
&Flags), &Flags);
}
@@ -8672,9 +8976,9 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N0.getOpcode() == ISD::FMUL &&
isConstantFPBuildVectorOrConstantFP(N1) &&
isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
- return DAG.getNode(ISD::FMA, dl, VT,
+ return DAG.getNode(ISD::FMA, DL, VT,
N0.getOperand(0),
- DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
+ DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
&Flags),
N2);
}
@@ -8685,32 +8989,32 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
// TODO: The FMA node should have flags that propagate to this node.
- return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
- SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
+ SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
AddToWorklist(RHSNeg.getNode());
// TODO: The FMA node should have flags that propagate to this node.
- return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
+ return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
}
}
if (Options.UnsafeFPMath) {
// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(1.0, dl, VT),
- &Flags), &Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1,
+ DAG.getConstantFP(1.0, DL, VT), &Flags),
+ &Flags);
}
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(-1.0, dl, VT),
- &Flags), &Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1,
+ DAG.getConstantFP(-1.0, DL, VT), &Flags),
+ &Flags);
}
}
@@ -8720,7 +9024,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal.
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
-// Notice that this is not always beneficial. One reason is different target
+// Notice that this is not always beneficial. One reason is different targets
// may have different costs for FDIV and FMUL, so sometimes the cost of two
// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
@@ -8907,14 +9211,18 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
}
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
- if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
+ if (!DAG.getTarget().Options.UnsafeFPMath)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ if (TLI.isFsqrtCheap(N0, DAG))
return SDValue();
// TODO: FSQRT nodes should have flags that propagate to the created nodes.
// For now, create a Flags object for use with all unsafe math transforms.
SDNodeFlags Flags;
Flags.setUnsafeAlgebra(true);
- return buildSqrtEstimate(N->getOperand(0), &Flags);
+ return buildSqrtEstimate(N0, &Flags);
}
/// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -8941,11 +9249,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
- if (N0CFP && N1CFP) // Constant fold
+ if (N0CFP && N1CFP) // Constant fold
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
if (N1CFP) {
- const APFloat& V = N1CFP->getValueAPF();
+ const APFloat &V = N1CFP->getValueAPF();
// copysign(x, c1) -> fabs(x) iff ispos(c1)
// copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
if (!V.isNegative()) {
@@ -8963,8 +9271,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
// copysign(copysign(x,z), y) -> copysign(x, y)
if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
N0.getOpcode() == ISD::FCOPYSIGN)
- return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
- N0.getOperand(0), N1);
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
// copysign(x, abs(y)) -> abs(x)
if (N1.getOpcode() == ISD::FABS)
@@ -8972,14 +9279,12 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
// copysign(x, copysign(y,z)) -> copysign(x, z)
if (N1.getOpcode() == ISD::FCOPYSIGN)
- return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
- N0, N1.getOperand(1));
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
// copysign(x, fp_extend(y)) -> copysign(x, y)
// copysign(x, fp_round(y)) -> copysign(x, y)
if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
- return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
- N0, N1.getOperand(0));
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
return SDValue();
}
@@ -9159,7 +9464,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
// fold (fp_round (fp_round x)) -> (fp_round x)
if (N0.getOpcode() == ISD::FP_ROUND) {
const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
- const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+ const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
// Skip this folding if it results in an fp_round from f80 to f16.
//
@@ -9232,7 +9537,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
// value of X.
if (N0.getOpcode() == ISD::FP_ROUND
- && N0.getNode()->getConstantOperandVal(1) == 1) {
+ && N0.getConstantOperandVal(1) == 1) {
SDValue In = N0.getOperand(0);
if (In.getValueType() == VT) return In;
if (VT.bitsLT(In.getValueType()))
@@ -9319,7 +9624,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (N0.getValueType().isVector()) {
// For a vector, get a mask such as 0x80... per scalar element
// and splat it.
- SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+ SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits());
SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
} else {
// For a scalar, just generate 0x80...
@@ -9424,7 +9729,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
if (N0.getValueType().isVector()) {
// For a vector, get a mask such as 0x7f... per scalar element
// and splat it.
- SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+ SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits());
SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
} else {
// For a scalar, just generate 0x7f...
@@ -10103,7 +10408,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// value.
// TODO: Handle store large -> read small portion.
// TODO: Handle TRUNCSTORE/LOADEXT
- if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
+ if (OptLevel != CodeGenOpt::None &&
+ ISD::isNormalLoad(N) && !LD->isVolatile()) {
if (ISD::isNON_TRUNCStore(Chain.getNode())) {
StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
if (PrevST->getBasePtr() == Ptr &&
@@ -10405,7 +10711,7 @@ struct LoadedSlice {
assert(Inst && Origin && "Unable to replace a non-existing slice.");
const SDValue &OldBaseAddr = Origin->getBasePtr();
SDValue BaseAddr = OldBaseAddr;
- // Get the offset in that chunk of bytes w.r.t. the endianess.
+ // Get the offset in that chunk of bytes w.r.t. the endianness.
int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
assert(Offset >= 0 && "Offset too big to fit in int64_t!");
if (Offset) {
@@ -10705,7 +11011,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
LSIt != LSItEnd; ++LSIt) {
SDValue SliceInst = LSIt->loadSlice();
CombineTo(LSIt->Inst, SliceInst, true);
- if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
+ if (SliceInst.getOpcode() != ISD::LOAD)
SliceInst = SliceInst.getOperand(0);
assert(SliceInst->getOpcode() == ISD::LOAD &&
"It takes more than a zext to get to the loaded slice!!");
@@ -11033,110 +11339,6 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
return SDValue();
}
-namespace {
-/// Helper struct to parse and store a memory address as base + index + offset.
-/// We ignore sign extensions when it is safe to do so.
-/// The following two expressions are not equivalent. To differentiate we need
-/// to store whether there was a sign extension involved in the index
-/// computation.
-/// (load (i64 add (i64 copyfromreg %c)
-/// (i64 signextend (add (i8 load %index)
-/// (i8 1))))
-/// vs
-///
-/// (load (i64 add (i64 copyfromreg %c)
-/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
-/// (i32 1)))))
-struct BaseIndexOffset {
- SDValue Base;
- SDValue Index;
- int64_t Offset;
- bool IsIndexSignExt;
-
- BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
-
- BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
- bool IsIndexSignExt) :
- Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
-
- bool equalBaseIndex(const BaseIndexOffset &Other) {
- return Other.Base == Base && Other.Index == Index &&
- Other.IsIndexSignExt == IsIndexSignExt;
- }
-
- /// Parses tree in Ptr for base, index, offset addresses.
- static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
- bool IsIndexSignExt = false;
-
- // Split up a folded GlobalAddress+Offset into its component parts.
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
- if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
- return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
- SDLoc(GA),
- GA->getValueType(0),
- /*Offset=*/0,
- /*isTargetGA=*/false,
- GA->getTargetFlags()),
- SDValue(),
- GA->getOffset(),
- IsIndexSignExt);
- }
-
- // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
- // instruction, then it could be just the BASE or everything else we don't
- // know how to handle. Just use Ptr as BASE and give up.
- if (Ptr->getOpcode() != ISD::ADD)
- return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
-
- // We know that we have at least an ADD instruction. Try to pattern match
- // the simple case of BASE + OFFSET.
- if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
- int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
- return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
- IsIndexSignExt);
- }
-
- // Inside a loop the current BASE pointer is calculated using an ADD and a
- // MUL instruction. In this case Ptr is the actual BASE pointer.
- // (i64 add (i64 %array_ptr)
- // (i64 mul (i64 %induction_var)
- // (i64 %element_size)))
- if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
- return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
-
- // Look at Base + Index + Offset cases.
- SDValue Base = Ptr->getOperand(0);
- SDValue IndexOffset = Ptr->getOperand(1);
-
- // Skip signextends.
- if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
- IndexOffset = IndexOffset->getOperand(0);
- IsIndexSignExt = true;
- }
-
- // Either the case of Base + Index (no offset) or something else.
- if (IndexOffset->getOpcode() != ISD::ADD)
- return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
-
- // Now we have the case of Base + Index + offset.
- SDValue Index = IndexOffset->getOperand(0);
- SDValue Offset = IndexOffset->getOperand(1);
-
- if (!isa<ConstantSDNode>(Offset))
- return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
-
- // Ignore signextends.
- if (Index->getOpcode() == ISD::SIGN_EXTEND) {
- Index = Index->getOperand(0);
- IsIndexSignExt = true;
- } else IsIndexSignExt = false;
-
- int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
- return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
- }
-};
-} // namespace
-
// This is a helper function for visitMUL to check the profitability
// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
// MulNode is the original multiply, AddNode is (add x, c1),
@@ -11351,6 +11553,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
}
}
+ StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end());
return true;
}
@@ -11493,7 +11696,8 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
return true;
}
-bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+bool DAGCombiner::MergeConsecutiveStores(
+ StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) {
if (OptLevel == CodeGenOpt::None)
return false;
@@ -11537,16 +11741,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// any of the store nodes.
SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
- // Save the StoreSDNodes that we find in the chain.
- SmallVector<MemOpLink, 8> StoreNodes;
-
getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
return false;
- // only do dep endence check in AA case
+ // only do dependence check in AA case
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
: DAG.getSubtarget().useAA();
if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
@@ -11582,10 +11783,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Check if this store interferes with any of the loads that we found.
// If we find a load that alias with this store. Stop the sequence.
- if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
- [&](LSBaseSDNode* Ldn) {
- return isAlias(Ldn, StoreNodes[i].MemNode);
- }))
+ if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
+ return isAlias(Ldn, StoreNodes[i].MemNode);
+ }))
break;
// Mark this node as useful.
@@ -11899,6 +12099,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
}
}
+ StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end());
return true;
}
@@ -12088,11 +12289,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
- SDValue Shorter =
- GetDemandedBits(Value,
- APInt::getLowBitsSet(
- Value.getValueType().getScalarType().getSizeInBits(),
- ST->getMemoryVT().getScalarType().getSizeInBits()));
+ SDValue Shorter = GetDemandedBits(
+ Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+ ST->getMemoryVT().getScalarSizeInBits()));
AddToWorklist(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
@@ -12100,10 +12299,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
- if (SimplifyDemandedBits(Value,
- APInt::getLowBitsSet(
- Value.getValueType().getScalarType().getSizeInBits(),
- ST->getMemoryVT().getScalarType().getSizeInBits())))
+ if (SimplifyDemandedBits(
+ Value,
+ APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+ ST->getMemoryVT().getScalarSizeInBits())))
return SDValue(N, 0);
}
@@ -12144,19 +12343,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Only perform this optimization before the types are legal, because we
// don't want to perform this optimization on every DAGCombine invocation.
if (!LegalTypes) {
- bool EverChanged = false;
-
- do {
+ for (;;) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
// or until we merge the last store on the chain.
- bool Changed = MergeConsecutiveStores(ST);
- EverChanged |= Changed;
+ SmallVector<MemOpLink, 8> StoreNodes;
+ bool Changed = MergeConsecutiveStores(ST, StoreNodes);
if (!Changed) break;
- } while (ST->getOpcode() != ISD::DELETED_NODE);
- if (EverChanged)
- return SDValue(N, 0);
+ if (any_of(StoreNodes,
+ [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) {
+ // ST has been merged and no longer exists.
+ return SDValue(N, 0);
+ }
+ }
}
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
@@ -12169,14 +12369,123 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return NewSt;
}
+ if (SDValue NewSt = splitMergedValStore(ST))
+ return NewSt;
+
return ReduceLoadOpStoreWidth(N);
}
+/// For the instruction sequence of store below, F and I values
+/// are bundled together as an i64 value before being stored into memory.
+/// Sometimes it is more efficent to generate separate stores for F and I,
+/// which can remove the bitwise instructions or sink them to colder places.
+///
+/// (store (or (zext (bitcast F to i32) to i64),
+/// (shl (zext I to i64), 32)), addr) -->
+/// (store F, addr) and (store I, addr+4)
+///
+/// Similarly, splitting for other merged store can also be beneficial, like:
+/// For pair of {i32, i32}, i64 store --> two i32 stores.
+/// For pair of {i32, i16}, i64 store --> two i32 stores.
+/// For pair of {i16, i16}, i32 store --> two i16 stores.
+/// For pair of {i16, i8}, i32 store --> two i16 stores.
+/// For pair of {i8, i8}, i16 store --> two i8 stores.
+///
+/// We allow each target to determine specifically which kind of splitting is
+/// supported.
+///
+/// The store patterns are commonly seen from the simple code snippet below
+/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
+/// void goo(const std::pair<int, float> &);
+/// hoo() {
+/// ...
+/// goo(std::make_pair(tmp, ftmp));
+/// ...
+/// }
+///
+SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
+ if (OptLevel == CodeGenOpt::None)
+ return SDValue();
+
+ SDValue Val = ST->getValue();
+ SDLoc DL(ST);
+
+ // Match OR operand.
+ if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
+ return SDValue();
+
+ // Match SHL operand and get Lower and Higher parts of Val.
+ SDValue Op1 = Val.getOperand(0);
+ SDValue Op2 = Val.getOperand(1);
+ SDValue Lo, Hi;
+ if (Op1.getOpcode() != ISD::SHL) {
+ std::swap(Op1, Op2);
+ if (Op1.getOpcode() != ISD::SHL)
+ return SDValue();
+ }
+ Lo = Op2;
+ Hi = Op1.getOperand(0);
+ if (!Op1.hasOneUse())
+ return SDValue();
+
+ // Match shift amount to HalfValBitSize.
+ unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
+ ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
+ if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
+ return SDValue();
+
+ // Lo and Hi are zero-extended from int with size less equal than 32
+ // to i64.
+ if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
+ !Lo.getOperand(0).getValueType().isScalarInteger() ||
+ Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
+ Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
+ !Hi.getOperand(0).getValueType().isScalarInteger() ||
+ Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
+ return SDValue();
+
+ // Use the EVT of low and high parts before bitcast as the input
+ // of target query.
+ EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
+ ? Lo.getOperand(0).getValueType()
+ : Lo.getValueType();
+ EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
+ ? Hi.getOperand(0).getValueType()
+ : Hi.getValueType();
+ if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
+ return SDValue();
+
+ // Start to split store.
+ unsigned Alignment = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ // Change the sizes of Lo and Hi's value types to HalfValBitSize.
+ EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
+ Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ // Lower value store.
+ SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
+ ST->getAlignment(), MMOFlags, AAInfo);
+ Ptr =
+ DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
+ // Higher value store.
+ SDValue St1 =
+ DAG.getStore(St0, DL, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
+ Alignment / 2, MMOFlags, AAInfo);
+ return St1;
+}
+
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue InVec = N->getOperand(0);
SDValue InVal = N->getOperand(1);
SDValue EltNo = N->getOperand(2);
- SDLoc dl(N);
+ SDLoc DL(N);
// If the inserted element is an UNDEF, just use the input vector.
if (InVal.isUndef())
@@ -12206,7 +12515,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
if (Elt < OtherElt) {
// Swap nodes.
- SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
+ SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
InVec.getOperand(0), InVal, EltNo);
AddToWorklist(NewOp.getNode());
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
@@ -12237,13 +12546,13 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
EVT OpVT = Ops[0].getValueType();
if (InVal.getValueType() != OpVT)
InVal = OpVT.bitsGT(InVal.getValueType()) ?
- DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
- DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
+ DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
+ DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
Ops[Elt] = InVal;
}
// Return the new vector
- return DAG.getBuildVector(VT, dl, Ops);
+ return DAG.getBuildVector(VT, DL, Ops);
}
SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
@@ -12544,7 +12853,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
return SDValue();
unsigned NumInScalars = N->getNumOperands();
- SDLoc dl(N);
+ SDLoc DL(N);
EVT VT = N->getValueType(0);
// Check to see if this is a BUILD_VECTOR of a bunch of values
@@ -12603,7 +12912,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
assert(ElemRatio > 1 && "Invalid element size ratio");
SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
- DAG.getConstant(0, SDLoc(N), SourceType);
+ DAG.getConstant(0, DL, SourceType);
unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
@@ -12634,7 +12943,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
if (!isTypeLegal(VecVT)) return SDValue();
// Make the new BUILD_VECTOR.
- SDValue BV = DAG.getBuildVector(VecVT, dl, Ops);
+ SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
// The new BUILD_VECTOR node has the potential to be further optimized.
AddToWorklist(BV.getNode());
@@ -12646,7 +12955,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumInScalars = N->getNumOperands();
- SDLoc dl(N);
+ SDLoc DL(N);
EVT SrcVT = MVT::Other;
unsigned Opcode = ISD::DELETED_NODE;
@@ -12707,30 +13016,126 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
else
Opnds.push_back(In.getOperand(0));
}
- SDValue BV = DAG.getBuildVector(NVT, dl, Opnds);
+ SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
AddToWorklist(BV.getNode());
- return DAG.getNode(Opcode, dl, VT, BV);
+ return DAG.getNode(Opcode, DL, VT, BV);
}
-SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
- unsigned NumInScalars = N->getNumOperands();
- SDLoc dl(N);
+SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
+ ArrayRef<int> VectorMask,
+ SDValue VecIn1, SDValue VecIn2,
+ unsigned LeftIdx) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
+
EVT VT = N->getValueType(0);
+ EVT InVT1 = VecIn1.getValueType();
+ EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
+
+ unsigned Vec2Offset = InVT1.getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned ShuffleNumElems = NumElems;
+
+ // We can't generate a shuffle node with mismatched input and output types.
+ // Try to make the types match the type of the output.
+ if (InVT1 != VT || InVT2 != VT) {
+ if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
+ // If the output vector length is a multiple of both input lengths,
+ // we can concatenate them and pad the rest with undefs.
+ unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
+ assert(NumConcats >= 2 && "Concat needs at least two inputs!");
+ SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
+ ConcatOps[0] = VecIn1;
+ ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ VecIn2 = SDValue();
+ } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
+ if (!TLI.isExtractSubvectorCheap(VT, NumElems))
+ return SDValue();
- // A vector built entirely of undefs is undef.
- if (ISD::allOperandsUndef(N))
- return DAG.getUNDEF(VT);
+ if (!VecIn2.getNode()) {
+ // If we only have one input vector, and it's twice the size of the
+ // output, split it in two.
+ VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
+ DAG.getConstant(NumElems, DL, IdxTy));
+ VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
+ // Since we now have shorter input vectors, adjust the offset of the
+ // second vector's start.
+ Vec2Offset = NumElems;
+ } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
+ // VecIn1 is wider than the output, and we have another, possibly
+ // smaller input. Pad the smaller input with undefs, shuffle at the
+ // input vector width, and extract the output.
+ // The shuffle type is different than VT, so check legality again.
+ if (LegalOperations &&
+ !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
+ return SDValue();
- if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
- return V;
+ // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
+ // lower it back into a BUILD_VECTOR. So if the inserted type is
+ // illegal, don't even try.
+ if (InVT1 != InVT2) {
+ if (!TLI.isTypeLegal(InVT2))
+ return SDValue();
+ VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
+ DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
+ }
+ ShuffleNumElems = NumElems * 2;
+ } else {
+ // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
+ // than VecIn1. We can't handle this for now - this case will disappear
+ // when we start sorting the vectors by type.
+ return SDValue();
+ }
+ } else {
+ // TODO: Support cases where the length mismatch isn't exactly by a
+ // factor of 2.
+ // TODO: Move this check upwards, so that if we have bad type
+ // mismatches, we don't create any DAG nodes.
+ return SDValue();
+ }
+ }
- if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
- return V;
+ // Initialize mask to undef.
+ SmallVector<int, 8> Mask(ShuffleNumElems, -1);
+
+ // Only need to run up to the number of elements actually used, not the
+ // total number of elements in the shuffle - if we are shuffling a wider
+ // vector, the high lanes should be set to undef.
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (VectorMask[i] <= 0)
+ continue;
+
+ unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
+ if (VectorMask[i] == (int)LeftIdx) {
+ Mask[i] = ExtIndex;
+ } else if (VectorMask[i] == (int)LeftIdx + 1) {
+ Mask[i] = Vec2Offset + ExtIndex;
+ }
+ }
+
+ // The type the input vectors may have changed above.
+ InVT1 = VecIn1.getValueType();
+
+ // If we already have a VecIn2, it should have the same type as VecIn1.
+ // If we don't, get an undef/zero vector of the appropriate type.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
+ assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
+
+ SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
+ if (ShuffleNumElems > NumElems)
+ Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
- // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
- // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
- // at most two distinct vectors, turn this into a shuffle node.
+ return Shuffle;
+}
+
+// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+// operations. If the types of the vectors we're extracting from allow it,
+// turn this into a vector_shuffle node.
+SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
// Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
if (!isTypeLegal(VT))
@@ -12740,149 +13145,169 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
return SDValue();
- SDValue VecIn1, VecIn2;
bool UsesZeroVector = false;
- for (unsigned i = 0; i != NumInScalars; ++i) {
+ unsigned NumElems = N->getNumOperands();
+
+ // Record, for each element of the newly built vector, which input vector
+ // that element comes from. -1 stands for undef, 0 for the zero vector,
+ // and positive values for the input vectors.
+ // VectorMask maps each element to its vector number, and VecIn maps vector
+ // numbers to their initial SDValues.
+
+ SmallVector<int, 8> VectorMask(NumElems, -1);
+ SmallVector<SDValue, 8> VecIn;
+ VecIn.push_back(SDValue());
+
+ for (unsigned i = 0; i != NumElems; ++i) {
SDValue Op = N->getOperand(i);
- // Ignore undef inputs.
- if (Op.isUndef()) continue;
- // See if we can combine this build_vector into a blend with a zero vector.
- if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
+ if (Op.isUndef())
+ continue;
+
+ // See if we can use a blend with a zero vector.
+ // TODO: Should we generalize this to a blend with an arbitrary constant
+ // vector?
+ if (isNullConstant(Op) || isNullFPConstant(Op)) {
UsesZeroVector = true;
+ VectorMask[i] = 0;
continue;
}
- // If this input is something other than a EXTRACT_VECTOR_ELT with a
- // constant index, bail out.
+ // Not an undef or zero. If the input is something other than an
+ // EXTRACT_VECTOR_ELT with a constant index, bail out.
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(Op.getOperand(1))) {
- VecIn1 = VecIn2 = SDValue(nullptr, 0);
- break;
- }
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
- // We allow up to two distinct input vectors.
SDValue ExtractedFromVec = Op.getOperand(0);
- if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
- continue;
- if (!VecIn1.getNode()) {
- VecIn1 = ExtractedFromVec;
- } else if (!VecIn2.getNode() && !UsesZeroVector) {
- VecIn2 = ExtractedFromVec;
- } else {
- // Too many inputs.
- VecIn1 = VecIn2 = SDValue(nullptr, 0);
- break;
- }
- }
-
- // If everything is good, we can make a shuffle operation.
- if (VecIn1.getNode()) {
- unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
- SmallVector<int, 8> Mask;
- for (unsigned i = 0; i != NumInScalars; ++i) {
- unsigned Opcode = N->getOperand(i).getOpcode();
- if (Opcode == ISD::UNDEF) {
- Mask.push_back(-1);
- continue;
- }
+ // All inputs must have the same element type as the output.
+ if (VT.getVectorElementType() !=
+ ExtractedFromVec.getValueType().getVectorElementType())
+ return SDValue();
- // Operands can also be zero.
- if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
- assert(UsesZeroVector &&
- (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
- "Unexpected node found!");
- Mask.push_back(NumInScalars+i);
- continue;
- }
+ // Have we seen this input vector before?
+ // The vectors are expected to be tiny (usually 1 or 2 elements), so using
+ // a map back from SDValues to numbers isn't worth it.
+ unsigned Idx = std::distance(
+ VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
+ if (Idx == VecIn.size())
+ VecIn.push_back(ExtractedFromVec);
- // If extracting from the first vector, just use the index directly.
- SDValue Extract = N->getOperand(i);
- SDValue ExtVal = Extract.getOperand(1);
- unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
- if (Extract.getOperand(0) == VecIn1) {
- Mask.push_back(ExtIndex);
- continue;
- }
+ VectorMask[i] = Idx;
+ }
- // Otherwise, use InIdx + InputVecSize
- Mask.push_back(InNumElements + ExtIndex);
- }
+ // If we didn't find at least one input vector, bail out.
+ if (VecIn.size() < 2)
+ return SDValue();
- // Avoid introducing illegal shuffles with zero.
- if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
+ // TODO: We want to sort the vectors by descending length, so that adjacent
+ // pairs have similar length, and the longer vector is always first in the
+ // pair.
+
+ // TODO: Should this fire if some of the input vectors has illegal type (like
+ // it does now), or should we let legalization run its course first?
+
+ // Shuffle phase:
+ // Take pairs of vectors, and shuffle them so that the result has elements
+ // from these vectors in the correct places.
+ // For example, given:
+ // t10: i32 = extract_vector_elt t1, Constant:i64<0>
+ // t11: i32 = extract_vector_elt t2, Constant:i64<0>
+ // t12: i32 = extract_vector_elt t3, Constant:i64<0>
+ // t13: i32 = extract_vector_elt t1, Constant:i64<1>
+ // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
+ // We will generate:
+ // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
+ // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
+ SmallVector<SDValue, 4> Shuffles;
+ for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
+ unsigned LeftIdx = 2 * In + 1;
+ SDValue VecLeft = VecIn[LeftIdx];
+ SDValue VecRight =
+ (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
+
+ if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
+ VecRight, LeftIdx))
+ Shuffles.push_back(Shuffle);
+ else
return SDValue();
+ }
- // We can't generate a shuffle node with mismatched input and output types.
- // Attempt to transform a single input vector to the correct type.
- if ((VT != VecIn1.getValueType())) {
- // If the input vector type has a different base type to the output
- // vector type, bail out.
- EVT VTElemType = VT.getVectorElementType();
- if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
- (VecIn2.getNode() &&
- (VecIn2.getValueType().getVectorElementType() != VTElemType)))
- return SDValue();
+ // If we need the zero vector as an "ingredient" in the blend tree, add it
+ // to the list of shuffles.
+ if (UsesZeroVector)
+ Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
+ : DAG.getConstantFP(0.0, DL, VT));
- // If the input vector is too small, widen it.
- // We only support widening of vectors which are half the size of the
- // output registers. For example XMM->YMM widening on X86 with AVX.
- EVT VecInT = VecIn1.getValueType();
- if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
- // If we only have one small input, widen it by adding undef values.
- if (!VecIn2.getNode())
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
- DAG.getUNDEF(VecIn1.getValueType()));
- else if (VecIn1.getValueType() == VecIn2.getValueType()) {
- // If we have two small inputs of the same type, try to concat them.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
- VecIn2 = SDValue(nullptr, 0);
- } else
- return SDValue();
- } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
- // If the input vector is too large, try to split it.
- // We don't support having two input vectors that are too large.
- // If the zero vector was used, we can not split the vector,
- // since we'd need 3 inputs.
- if (UsesZeroVector || VecIn2.getNode())
- return SDValue();
+ // If we only have one shuffle, we're done.
+ if (Shuffles.size() == 1)
+ return Shuffles[0];
- if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
- return SDValue();
+ // Update the vector mask to point to the post-shuffle vectors.
+ for (int &Vec : VectorMask)
+ if (Vec == 0)
+ Vec = Shuffles.size() - 1;
+ else
+ Vec = (Vec - 1) / 2;
+
+ // More than one shuffle. Generate a binary tree of blends, e.g. if from
+ // the previous step we got the set of shuffles t10, t11, t12, t13, we will
+ // generate:
+ // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
+ // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
+ // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
+ // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
+ // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
+ // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
+ // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
+
+ // Make sure the initial size of the shuffle list is even.
+ if (Shuffles.size() % 2)
+ Shuffles.push_back(DAG.getUNDEF(VT));
+
+ for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
+ if (CurSize % 2) {
+ Shuffles[CurSize] = DAG.getUNDEF(VT);
+ CurSize++;
+ }
+ for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
+ int Left = 2 * In;
+ int Right = 2 * In + 1;
+ SmallVector<int, 8> Mask(NumElems, -1);
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (VectorMask[i] == Left) {
+ Mask[i] = i;
+ VectorMask[i] = In;
+ } else if (VectorMask[i] == Right) {
+ Mask[i] = i + NumElems;
+ VectorMask[i] = In;
+ }
+ }
- // Try to replace VecIn1 with two extract_subvectors
- // No need to update the masks, they should still be correct.
- VecIn2 = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
- DAG.getConstant(VT.getVectorNumElements(), dl,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
- VecIn1 = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- } else
- return SDValue();
+ Shuffles[In] =
+ DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
}
+ }
- if (UsesZeroVector)
- VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
- DAG.getConstantFP(0.0, dl, VT);
- else
- // If VecIn2 is unused then change it to undef.
- VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ return Shuffles[0];
+}
- // Check that we were able to transform all incoming values to the same
- // type.
- if (VecIn2.getValueType() != VecIn1.getValueType() ||
- VecIn1.getValueType() != VT)
- return SDValue();
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
- // Return the new VECTOR_SHUFFLE node.
- SDValue Ops[2];
- Ops[0] = VecIn1;
- Ops[1] = VecIn2;
- return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask);
- }
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
+ if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
+ return V;
+
+ if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
+ return V;
+
+ if (SDValue V = reduceBuildVecToShuffle(N))
+ return V;
return SDValue();
}
@@ -13071,8 +13496,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
return SDValue();
- SDLoc dl = SDLoc(N);
- SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
+ SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
return DAG.getBitcast(VT, Res);
}
}
@@ -13208,7 +13632,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
V = V.getOperand(0);
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
- SDLoc dl(N);
// Handle only simple case where vector being inserted and vector
// being extracted are of same type, and are half size of larger vectors.
EVT BigVT = V->getOperand(0).getValueType();
@@ -13228,11 +13651,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// Into:
// indices are equal or bit offsets are equal => V1
// otherwise => (extract_subvec V1, ExtIdx)
- if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
- ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
+ if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
+ ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
return DAG.getBitcast(NVT, V->getOperand(1));
return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, NVT,
+ ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
N->getOperand(1));
}
@@ -13391,6 +13814,84 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
+// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+//
+// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
+// a simplification in some sense, but it isn't appropriate in general: some
+// BUILD_VECTORs are substantially cheaper than others. The general case
+// of a BUILD_VECTOR requires inserting each element individually (or
+// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
+// all constants is a single constant pool load. A BUILD_VECTOR where each
+// element is identical is a splat. A BUILD_VECTOR where most of the operands
+// are undef lowers to a small number of element insertions.
+//
+// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
+// We don't fold shuffles where one side is a non-zero constant, and we don't
+// fold shuffles if the resulting BUILD_VECTOR would have duplicate
+// non-constant operands. This seems to work out reasonably well in practice.
+static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = SVN->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue N0 = SVN->getOperand(0);
+ SDValue N1 = SVN->getOperand(1);
+
+ if (!N0->hasOneUse() || !N1->hasOneUse())
+ return SDValue();
+ // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
+ // discussed above.
+ if (!N1.isUndef()) {
+ bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
+ bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
+ if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
+ return SDValue();
+ if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
+ return SDValue();
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ SmallSet<SDValue, 16> DuplicateOps;
+ for (int M : SVN->getMask()) {
+ SDValue Op = DAG.getUNDEF(VT.getScalarType());
+ if (M >= 0) {
+ int Idx = M < (int)NumElts ? M : M - NumElts;
+ SDValue &S = (M < (int)NumElts ? N0 : N1);
+ if (S.getOpcode() == ISD::BUILD_VECTOR) {
+ Op = S.getOperand(Idx);
+ } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ if (Idx == 0)
+ Op = S.getOperand(0);
+ } else {
+ // Operand can't be combined - bail out.
+ return SDValue();
+ }
+ }
+
+ // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
+ // fine, but it's likely to generate low-quality code if the target can't
+ // reconstruct an appropriate shuffle.
+ if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
+ if (!DuplicateOps.insert(Op).second)
+ return SDValue();
+
+ Ops.push_back(Op);
+ }
+ // BUILD_VECTOR requires all inputs to be of the same type, find the
+ // maximum type and extend them all.
+ EVT SVT = VT.getScalarType();
+ if (SVT.isInteger())
+ for (SDValue &Op : Ops)
+ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+ if (SVT != VT.getScalarType())
+ for (SDValue &Op : Ops)
+ Op = TLI.isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
+ : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
+ return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -13506,40 +14007,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
- if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
- SmallVector<SDValue, 8> Ops;
- for (int M : SVN->getMask()) {
- SDValue Op = DAG.getUNDEF(VT.getScalarType());
- if (M >= 0) {
- int Idx = M % NumElts;
- SDValue &S = (M < (int)NumElts ? N0 : N1);
- if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
- Op = S.getOperand(Idx);
- } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
- if (Idx == 0)
- Op = S.getOperand(0);
- } else {
- // Operand can't be combined - bail out.
- break;
- }
- }
- Ops.push_back(Op);
- }
- if (Ops.size() == VT.getVectorNumElements()) {
- // BUILD_VECTOR requires all inputs to be of the same type, find the
- // maximum type and extend them all.
- EVT SVT = VT.getScalarType();
- if (SVT.isInteger())
- for (SDValue &Op : Ops)
- SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
- if (SVT != VT.getScalarType())
- for (SDValue &Op : Ops)
- Op = TLI.isZExtFree(Op.getValueType(), SVT)
- ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
- : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
- return DAG.getBuildVector(VT, SDLoc(N), Ops);
- }
- }
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
+ return Res;
// If this shuffle only has a single input that is a bitcasted shuffle,
// attempt to merge the 2 shuffles and suitably bitcast the inputs/output
@@ -13647,6 +14117,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+ // Don't try to fold splats; they're likely to simplify somehow, or they
+ // might be free.
+ if (OtherSV->isSplat())
+ return SDValue();
+
// The incoming shuffle must be of the same type as the result of the
// current shuffle.
assert(OtherSV->getOperand(0).getValueType() == VT &&
@@ -13773,10 +14248,20 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
}
SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ // Combine INSERT_SUBVECTORs where we are inserting to the same index.
+ // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
+ // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ N0.getOperand(1).getValueType() == N1.getValueType() &&
+ N0.getOperand(2) == N2)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
+ N1, N2);
+
if (N0.getValueType() != N1.getValueType())
return SDValue();
@@ -13785,7 +14270,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
N2.getOpcode() == ISD::Constant) {
APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
- EVT VT = N->getValueType(0);
// Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
// (concat_vectors Z, Y)
@@ -13836,7 +14320,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- SDLoc dl(N);
+ SDLoc DL(N);
// Make sure we're not running after operation legalization where it
// may have custom lowered the vector shuffles.
@@ -13904,8 +14388,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
return SDValue();
- SDValue Zero = DAG.getConstant(0, dl, ClearVT);
- return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
+ SDValue Zero = DAG.getConstant(0, DL, ClearVT);
+ return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
DAG.getBitcast(ClearVT, LHS),
Zero, Indices));
};
@@ -14119,6 +14603,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
if (!RLD->isInvariant())
MMOFlags &= ~MachineMemOperand::MOInvariant;
+ if (!RLD->isDereferenceable())
+ MMOFlags &= ~MachineMemOperand::MODereferenceable;
if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
// FIXME: Discards pointer and AA info.
Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
@@ -14146,6 +14632,73 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
return false;
}
+/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
+/// bitwise 'and'.
+SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
+ SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC) {
+ // If this is a select where the false operand is zero and the compare is a
+ // check of the sign bit, see if we can perform the "gzip trick":
+ // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
+ // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
+ EVT XType = N0.getValueType();
+ EVT AType = N2.getValueType();
+ if (!isNullConstant(N3) || !XType.bitsGE(AType))
+ return SDValue();
+
+ // If the comparison is testing for a positive value, we have to invert
+ // the sign bit mask, so only do that transform if the target has a bitwise
+ // 'and not' instruction (the invert is free).
+ if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
+ // (X > -1) ? A : 0
+ // (X > 0) ? X : 0 <-- This is canonical signed max.
+ if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
+ return SDValue();
+ } else if (CC == ISD::SETLT) {
+ // (X < 0) ? A : 0
+ // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
+ if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
+ return SDValue();
+ } else {
+ return SDValue();
+ }
+
+ // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
+ // constant.
+ EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
+ auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
+ unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
+ SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
+ AddToWorklist(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
+
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(DL, Shift, AType);
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+
+ SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
+ AddToWorklist(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
+
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(DL, Shift, AType);
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+}
+
/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
/// where 'cond' is the comparison specified by CC.
SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
@@ -14242,48 +14795,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
}
}
- // Check to see if we can perform the "gzip trick", transforming
- // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
- if (isNullConstant(N3) && CC == ISD::SETLT &&
- (isNullConstant(N1) || // (a < 0) ? b : 0
- (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0
- EVT XType = N0.getValueType();
- EVT AType = N2.getValueType();
- if (XType.bitsGE(AType)) {
- // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
- // single-bit constant.
- if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
- unsigned ShCtV = N2C->getAPIntValue().logBase2();
- ShCtV = XType.getSizeInBits() - ShCtV - 1;
- SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
- getShiftAmountTy(N0.getValueType()));
- SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
- XType, N0, ShCt);
- AddToWorklist(Shift.getNode());
-
- if (XType.bitsGT(AType)) {
- Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
- AddToWorklist(Shift.getNode());
- }
-
- return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
- }
-
- SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
- XType, N0,
- DAG.getConstant(XType.getSizeInBits() - 1,
- SDLoc(N0),
- getShiftAmountTy(N0.getValueType())));
- AddToWorklist(Shift.getNode());
-
- if (XType.bitsGT(AType)) {
- Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
- AddToWorklist(Shift.getNode());
- }
-
- return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
- }
- }
+ if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
+ return V;
// fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
// where y is has a single bit set.
@@ -14511,30 +15024,51 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
return S;
}
+/// Determines the LogBase2 value for a non-null input value using the
+/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
+SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
+ EVT VT = V.getValueType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
+ SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
+ SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
+ return LogBase2;
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal, we need to find the zero of the function:
+/// F(X) = A X - 1 [which has a zero at X = 1/A]
+/// =>
+/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+/// does not require additional intermediate precision]
SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
- // Expose the DAG combiner to the target combiner implementations.
- TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+ // TODO: Handle half and/or extended types?
+ EVT VT = Op.getValueType();
+ if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ return SDValue();
+
+ // If estimates are explicitly disabled for this function, we're done.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
+ if (Enabled == TLI.ReciprocalEstimate::Disabled)
+ return SDValue();
+
+ // Estimates may be explicitly enabled for this type with a custom number of
+ // refinement steps.
+ int Iterations = TLI.getDivRefinementSteps(VT, MF);
+ if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
+ AddToWorklist(Est.getNode());
- unsigned Iterations = 0;
- if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
if (Iterations) {
- // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
- // For the reciprocal, we need to find the zero of the function:
- // F(X) = A X - 1 [which has a zero at X = 1/A]
- // =>
- // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
- // does not require additional intermediate precision]
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
- AddToWorklist(Est.getNode());
-
// Newton iterations: Est = Est + Est (1 - Arg * Est)
- for (unsigned i = 0; i < Iterations; ++i) {
+ for (int i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
AddToWorklist(NewEst.getNode());
@@ -14656,16 +15190,47 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
if (Level >= AfterLegalizeDAG)
return SDValue();
- // Expose the DAG combiner to the target combiner implementations.
- TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
- unsigned Iterations = 0;
+ // TODO: Handle half and/or extended types?
+ EVT VT = Op.getValueType();
+ if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ return SDValue();
+
+ // If estimates are explicitly disabled for this function, we're done.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
+ if (Enabled == TLI.ReciprocalEstimate::Disabled)
+ return SDValue();
+
+ // Estimates may be explicitly enabled for this type with a custom number of
+ // refinement steps.
+ int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
+
bool UseOneConstNR = false;
- if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
+ if (SDValue Est =
+ TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
+ Reciprocal)) {
AddToWorklist(Est.getNode());
+
if (Iterations) {
Est = UseOneConstNR
- ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
- : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+ ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+
+ if (!Reciprocal) {
+ // Unfortunately, Est is now NaN if the input was exactly 0.0.
+ // Select out this case and force the answer to 0.0.
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ EVT CCVT = getSetCCResultType(VT);
+ SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+ AddToWorklist(ZeroCmp.getNode());
+
+ Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+ ZeroCmp, FPZero, Est);
+ AddToWorklist(Est.getNode());
+ }
}
return Est;
}
@@ -14678,23 +15243,7 @@ SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
}
SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
- SDValue Est = buildSqrtEstimateImpl(Op, Flags, false);
- if (!Est)
- return SDValue();
-
- // Unfortunately, Est is now NaN if the input was exactly 0.
- // Select out this case and force the answer to 0.
- EVT VT = Est.getValueType();
- SDLoc DL(Op);
- SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
- EVT CCVT = getSetCCResultType(VT);
- SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ);
- AddToWorklist(ZeroCmp.getNode());
-
- Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp,
- Zero, Est);
- AddToWorklist(Est.getNode());
- return Est;
+ return buildSqrtEstimateImpl(Op, Flags, false);
}
/// Return true if base is a frame index, which is known not to alias with
@@ -14771,9 +15320,9 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
// To catch this case, look up the actual index of frame indices to compute
// the real alias relationship.
if (isFrameIndex1 && isFrameIndex2) {
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
- Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+ Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
(Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
}
OpenPOWER on IntegriCloud