summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp4064
1 files changed, 2653 insertions, 1411 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1df61e4..a71c676 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -246,10 +246,11 @@ namespace {
SDValue visitSDIVREM(SDNode *N);
SDValue visitUDIVREM(SDNode *N);
SDValue visitAND(SDNode *N);
+ SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitOR(SDNode *N);
+ SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitXOR(SDNode *N);
SDValue SimplifyVBinOp(SDNode *N);
- SDValue SimplifyVUnaryOp(SDNode *N);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
@@ -267,6 +268,7 @@ namespace {
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+ SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
SDValue visitBITCAST(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
@@ -302,9 +304,16 @@ namespace {
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitSCALAR_TO_VECTOR(SDNode *N);
SDValue visitINSERT_SUBVECTOR(SDNode *N);
SDValue visitMLOAD(SDNode *N);
SDValue visitMSTORE(SDNode *N);
+ SDValue visitMGATHER(SDNode *N);
+ SDValue visitMSCATTER(SDNode *N);
+ SDValue visitFP_TO_FP16(SDNode *N);
+
+ SDValue visitFADDForFMACombine(SDNode *N);
+ SDValue visitFSUBForFMACombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -327,6 +336,7 @@ namespace {
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue CombineExtLoad(SDNode *N);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
@@ -363,6 +373,28 @@ namespace {
/// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
+ /// Holds a pointer to an LSBaseSDNode as well as information on where it
+ /// is located in a sequence of memory operations connected by a chain.
+ struct MemOpLink {
+ MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
+ MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+ // Ptr to the mem node.
+ LSBaseSDNode *MemNode;
+ // Offset from the base ptr.
+ int64_t OffsetFromBase;
+ // What is the sequence number of this mem node.
+ // Lowest mem operand in the DAG starts at zero.
+ unsigned SequenceNum;
+ };
+
+ /// This is a helper function for MergeConsecutiveStores. When the source
+ /// elements of the consecutive stores are all constants or all extracted
+ /// vector elements, try to merge them into one larger store.
+ /// \return True if a merged store was created.
+ bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ EVT MemVT, unsigned NumElem,
+ bool IsConstantSrc, bool UseVector);
+
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return True if some memory operations were changed.
@@ -380,12 +412,9 @@ namespace {
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
- AttributeSet FnAttrs =
- DAG.getMachineFunction().getFunction()->getAttributes();
- ForCodeSize =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize) ||
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ auto *F = DAG.getMachineFunction().getFunction();
+ ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
+ F->hasFnAttribute(Attribute::MinSize);
}
/// Runs the dag combiner on all nodes in the work list
@@ -446,7 +475,7 @@ void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
}
SDValue TargetLowering::DAGCombinerInfo::
-CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
}
@@ -566,7 +595,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
case ISD::ConstantFP: {
APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
V.changeSign();
- return DAG.getConstantFP(V, Op.getValueType());
+ return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
}
case ISD::FADD:
// FIXME: determine better conditions for this xform.
@@ -590,7 +619,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
- if (N0CFP->getValueAPF().isZero())
+ if (N0CFP->isZero())
return Op.getOperand(1);
// fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -683,13 +712,23 @@ static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
EltVT.getSizeInBits() >= SplatBitSize);
}
-// \brief Returns the SDNode if it is a constant BuildVector or constant.
-static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
+// \brief Returns the SDNode if it is a constant integer BuildVector
+// or constant integer.
+static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
if (isa<ConstantSDNode>(N))
return N.getNode();
- BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
- if (BV && BV->isConstant())
- return BV;
+ if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
+ return N.getNode();
+ return nullptr;
+}
+
+// \brief Returns the SDNode if it is a constant float BuildVector
+// or constant float.
+static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
+ if (isa<ConstantFPSDNode>(N))
+ return N.getNode();
+ if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+ return N.getNode();
return nullptr;
}
@@ -735,10 +774,10 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
SDValue N0, SDValue N1) {
EVT VT = N0.getValueType();
if (N0.getOpcode() == Opc) {
- if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {
+ if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+ if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) {
// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R))
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
return SDValue();
}
@@ -755,10 +794,10 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
}
if (N1.getOpcode() == Opc) {
- if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) {
- if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) {
+ if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
+ if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) {
// reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L))
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
return SDValue();
}
@@ -1309,6 +1348,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
+ case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
@@ -1344,9 +1384,13 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
+ case ISD::MGATHER: return visitMGATHER(N);
case ISD::MLOAD: return visitMLOAD(N);
+ case ISD::MSCATTER: return visitMSCATTER(N);
case ISD::MSTORE: return visitMSTORE(N);
+ case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
}
return SDValue();
}
@@ -1412,9 +1456,10 @@ SDValue DAGCombiner::combine(SDNode *N) {
SDNode *CSENode;
if (const BinaryWithFlagsSDNode *BinNode =
dyn_cast<BinaryWithFlagsSDNode>(N)) {
- CSENode = DAG.getNodeIfExists(
- N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
- BinNode->hasNoSignedWrap(), BinNode->isExact());
+ CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+ BinNode->Flags.hasNoUnsignedWrap(),
+ BinNode->Flags.hasNoSignedWrap(),
+ BinNode->Flags.hasExact());
} else {
CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
}
@@ -1471,7 +1516,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
switch (Op.getOpcode()) {
case ISD::EntryToken:
// Entry tokens don't need to be added to the list. They are
- // rededundant.
+ // redundant.
Changed = true;
break;
@@ -1500,7 +1545,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
SDValue Result;
- // If we've change things around then replace token factor.
+ // If we've changed things around then replace token factor.
if (Changed) {
if (Ops.empty()) {
// The entry token is the only possible outcome.
@@ -1510,8 +1555,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
}
- // Don't add users to work list.
- return CombineTo(N, Result, false);
+ // Add users to worklist if AA is enabled, since it may introduce
+ // a lot of new chained token factors while removing memory deps.
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+ return CombineTo(N, Result, UseAA /*add to worklist*/);
}
return Result;
@@ -1534,17 +1582,42 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+static bool isNullConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isNullValue();
+}
+
+static bool isNullFPConstant(SDValue V) {
+ ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
+ return Const != nullptr && Const->isZero() && !Const->isNegative();
+}
+
+static bool isAllOnesConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isAllOnesValue();
+}
+
+static bool isOneConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isOne();
+}
+
+/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
+/// ContantSDNode pointer else nullptr.
+static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
+ return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
// fold (add x, 0) -> x, vector edition
if (ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -1559,13 +1632,16 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (add c1, c2) -> c1+c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
- if (N0C && !N1C)
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
// fold (add x, 0) -> x
- if (N1C && N1C->isNullValue())
+ if (isNullConstant(N1))
return N0;
// fold (add Sym, c) -> Sym+c
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
@@ -1576,22 +1652,21 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
(uint64_t)N1C->getSExtValue());
// fold ((c1-A)+c2) -> (c1+c2)-A
if (N1C && N0.getOpcode() == ISD::SUB)
- if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
+ if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(N1C->getAPIntValue()+
- N0C->getAPIntValue(), VT),
+ N0C->getAPIntValue(), DL, VT),
N0.getOperand(1));
+ }
// reassociate add
- SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
- if (RADD.getNode())
+ if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
return RADD;
// fold ((0-A) + B) -> B-A
- if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
- cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+ if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
// fold (A + (0-B)) -> A-B
- if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
- cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+ if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
// fold (A+(B-A)) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
@@ -1651,34 +1726,27 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
- if (N1.getOpcode() == ISD::SHL &&
- N1.getOperand(0).getOpcode() == ISD::SUB)
- if (ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
- if (C->getAPIntValue() == 0)
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
- DAG.getNode(ISD::SHL, SDLoc(N), VT,
- N1.getOperand(0).getOperand(1),
- N1.getOperand(1)));
- if (N0.getOpcode() == ISD::SHL &&
- N0.getOperand(0).getOpcode() == ISD::SUB)
- if (ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
- if (C->getAPIntValue() == 0)
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
- DAG.getNode(ISD::SHL, SDLoc(N), VT,
- N0.getOperand(0).getOperand(1),
- N0.getOperand(1)));
+ if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
+ isNullConstant(N1.getOperand(0).getOperand(0)))
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
+ DAG.getNode(ISD::SHL, SDLoc(N), VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1)));
+ if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
+ isNullConstant(N0.getOperand(0).getOperand(0)))
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
+ DAG.getNode(ISD::SHL, SDLoc(N), VT,
+ N0.getOperand(0).getOperand(1),
+ N0.getOperand(1)));
if (N1.getOpcode() == ISD::AND) {
SDValue AndOp0 = N1.getOperand(0);
- ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
unsigned DestBits = VT.getScalarType().getSizeInBits();
// (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
// and similar xforms where the inner op is either ~0 or 0.
- if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+ if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
SDLoc DL(N);
return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
}
@@ -1699,7 +1767,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (TN->getVT() == MVT::i1) {
SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
- DAG.getConstant(1, VT));
+ DAG.getConstant(1, DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
}
}
@@ -1710,8 +1778,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
@@ -1721,11 +1787,13 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
SDLoc(N), MVT::Glue));
// canonicalize constant to RHS.
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
// fold (addc x, 0) -> x + no carry out
- if (N1C && N1C->isNullValue())
+ if (isNullConstant(N1))
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
SDLoc(N), MVT::Glue));
@@ -1752,10 +1820,10 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
// canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
N1, N0, CarryIn);
@@ -1773,25 +1841,21 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
SelectionDAG &DAG,
bool LegalOperations, bool LegalTypes) {
if (!VT.isVector())
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, DL, VT);
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, DL, VT);
return SDValue();
}
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
- dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
// fold (sub x, 0) -> x, vector edition
if (ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -1803,14 +1867,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
// fold (sub c1, c2) -> c1-c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C);
// fold (sub x, c) -> (add x, -c)
- if (N1C)
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0,
- DAG.getConstant(-N1C->getAPIntValue(), VT));
+ if (N1C) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::ADD, DL, VT, N0,
+ DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+ }
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
- if (N0C && N0C->isAllOnesValue())
+ if (isAllOnesConstant(N0))
return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
// fold A-(A-B) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
@@ -1822,10 +1890,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
// fold C2-(A+C1) -> (C2-C1)-A
+ ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
+ dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+ SDLoc DL(N);
SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
- VT);
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC,
+ DL, VT);
+ return DAG.getNode(ISD::SUB, DL, VT, NewC,
N1.getOperand(0));
}
// fold ((A+(B+or-C))-B) -> A+or-C
@@ -1866,7 +1937,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
if (GA->getGlobal() == GB->getGlobal())
return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
- VT);
+ SDLoc(N), VT);
}
// sub X, (sextinreg Y i1) -> add X, (and Y 1)
@@ -1875,7 +1946,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (TN->getVT() == MVT::i1) {
SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
- DAG.getConstant(1, VT));
+ DAG.getConstant(1, DL, VT));
return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
}
}
@@ -1886,8 +1957,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an SUB.
@@ -1897,18 +1966,20 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
MVT::Glue));
// fold (subc x, x) -> 0 + no borrow
- if (N0 == N1)
- return CombineTo(N, DAG.getConstant(0, VT),
- DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
+ if (N0 == N1) {
+ SDLoc DL(N);
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getNode(ISD::CARRY_FALSE, DL,
MVT::Glue));
+ }
// fold (subc x, 0) -> x + no borrow
- if (N1C && N1C->isNullValue())
+ if (isNullConstant(N1))
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
- if (N0C && N0C->isAllOnesValue())
+ if (isAllOnesConstant(N0))
return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
@@ -1935,33 +2006,41 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
bool N0IsConst = false;
bool N1IsConst = false;
+ bool N1IsOpaqueConst = false;
+ bool N0IsOpaqueConst = false;
APInt ConstValue0, ConstValue1;
// fold vector ops
if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
} else {
- N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr;
- ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
- : APInt();
- N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr;
- ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
- : APInt();
+ N0IsConst = isa<ConstantSDNode>(N0);
+ if (N0IsConst) {
+ ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
+ N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
+ }
+ N1IsConst = isa<ConstantSDNode>(N1);
+ if (N1IsConst) {
+ ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
+ N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
+ }
}
// fold (mul c1, c2) -> c1*c2
- if (N0IsConst && N1IsConst)
- return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
+ if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
+ return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
+ N0.getNode(), N1.getNode());
- // canonicalize constant to RHS
- if (N0IsConst && !N1IsConst)
+ // canonicalize constant to RHS (vector doesn't have to splat)
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1 == 0)
@@ -1974,23 +2053,30 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
return N0;
// fold (mul x, -1) -> 0-x
- if (N1IsConst && ConstValue1.isAllOnesValue())
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
- DAG.getConstant(0, VT), N0);
+ if (N1IsConst && ConstValue1.isAllOnesValue()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT), N0);
+ }
// fold (mul x, (1 << c)) -> x << c
- if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
- DAG.getConstant(ConstValue1.logBase2(),
+ if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
+ IsFullSplat) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(ConstValue1.logBase2(), DL,
getShiftAmountTy(N0.getValueType())));
+ }
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
- if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
+ if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
+ IsFullSplat) {
unsigned Log2Val = (-ConstValue1).logBase2();
+ SDLoc DL(N);
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
- DAG.getConstant(0, VT),
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
- DAG.getConstant(Log2Val,
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT),
+ DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(Log2Val, DL,
getShiftAmountTy(N0.getValueType()))));
}
@@ -2041,8 +2127,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N0.getOperand(1), N1));
// reassociate mul
- SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
- if (RMUL.getNode())
+ if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
return RMUL;
return SDValue();
@@ -2051,26 +2136,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold vector ops
- if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
- }
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
// fold (sdiv c1, c2) -> c1/c2
- if (N0C && N1C && !N1C->isNullValue())
- return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
// fold (sdiv X, 1) -> X
- if (N1C && N1C->getAPIntValue() == 1LL)
+ if (N1C && N1C->isOne())
return N0;
// fold (sdiv X, -1) -> 0-X
- if (N1C && N1C->isAllOnesValue())
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
- DAG.getConstant(0, VT), N0);
+ if (N1C && N1C->isAllOnesValue()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT), N0);
+ }
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
if (!VT.isVector()) {
@@ -2080,8 +2166,9 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
}
// fold (sdiv X, pow2) -> simple ops after legalize
- if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() ||
- (-N1C->getAPIntValue()).isPowerOf2())) {
+ if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+ (N1C->getAPIntValue().isPowerOf2() ||
+ (-N1C->getAPIntValue()).isPowerOf2())) {
// If dividing by powers of two is cheap, then don't perform the following
// fold.
if (TLI.isPow2SDivCheap())
@@ -2093,24 +2180,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return Res;
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
+ SDLoc DL(N);
// Splat the sign bit into the register
SDValue SGN =
- DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
- DAG.getConstant(VT.getScalarSizeInBits() - 1,
+ DAG.getNode(ISD::SRA, DL, VT, N0,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
AddToWorklist(SGN.getNode());
// Add (N0 < 0) ? abs2 - 1 : 0;
SDValue SRL =
- DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
- DAG.getConstant(VT.getScalarSizeInBits() - lg2,
+ DAG.getNode(ISD::SRL, DL, VT, SGN,
+ DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
getShiftAmountTy(SGN.getValueType())));
- SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
+ SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
AddToWorklist(SRL.getNode());
AddToWorklist(ADD.getNode()); // Divide by pow2
- SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
- DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
+ DAG.getConstant(lg2, DL,
+ getShiftAmountTy(ADD.getValueType())));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
@@ -2118,10 +2207,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return SRA;
AddToWorklist(SRA.getNode());
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
- // if integer divide is expensive and we satisfy the requirements, emit an
+ // If integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence.
if (N1C && !TLI.isIntDivCheap()) {
SDValue Op = BuildSDIV(N);
@@ -2130,7 +2219,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2141,36 +2230,40 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold vector ops
- if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
- }
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
// fold (udiv c1, c2) -> c1/c2
- if (N0C && N1C && !N1C->isNullValue())
- return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N0C && N1C)
+ if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT,
+ N0C, N1C))
+ return Folded;
// fold (udiv x, (1 << c)) -> x >>u c
- if (N1C && N1C->getAPIntValue().isPowerOf2())
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
- DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SRL, DL, VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
getShiftAmountTy(N0.getValueType())));
+ }
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
EVT ADDVT = N1.getOperand(1).getValueType();
- SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT,
+ SDLoc DL(N);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
N1.getOperand(1),
DAG.getConstant(SHC->getAPIntValue()
.logBase2(),
- ADDVT));
+ DL, ADDVT));
AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
}
}
}
@@ -2182,7 +2275,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2193,13 +2286,15 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold (srem c1, c2) -> c1%c2
- if (N0C && N1C && !N1C->isNullValue())
- return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N0C && N1C)
+ if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT,
+ N0C, N1C))
+ return Folded;
// If we know the sign bits of both operands are zero, strength reduce to a
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
if (!VT.isVector()) {
@@ -2224,7 +2319,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2235,27 +2330,33 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue DAGCombiner::visitUREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold (urem c1, c2) -> c1%c2
- if (N0C && N1C && !N1C->isNullValue())
- return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N0C && N1C)
+ if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT,
+ N0C, N1C))
+ return Folded;
// fold (urem x, pow2) -> (and x, pow2-1)
- if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0,
- DAG.getConstant(N1C->getAPIntValue()-1,VT));
+ if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+ N1C->getAPIntValue().isPowerOf2()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT, N0,
+ DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+ }
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
if (N1.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
+ SDLoc DL(N);
SDValue Add =
- DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+ DAG.getNode(ISD::ADD, DL, VT, N1,
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
VT));
AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
}
}
@@ -2277,7 +2378,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2288,21 +2389,23 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
SDValue DAGCombiner::visitMULHS(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
// fold (mulhs x, 0) -> 0
- if (N1C && N1C->isNullValue())
+ if (isNullConstant(N1))
return N1;
// fold (mulhs x, 1) -> (sra x, size(x)-1)
- if (N1C && N1C->getAPIntValue() == 1)
- return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0,
+ if (isOneConstant(N1)) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+ DL,
getShiftAmountTy(N0.getValueType())));
+ }
// fold (mulhs x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
@@ -2315,7 +2418,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
- DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(N1.getValueType())));
return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
}
}
@@ -2326,19 +2430,18 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
SDValue DAGCombiner::visitMULHU(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
// fold (mulhu x, 0) -> 0
- if (N1C && N1C->isNullValue())
+ if (isNullConstant(N1))
return N1;
// fold (mulhu x, 1) -> 0
- if (N1C && N1C->getAPIntValue() == 1)
- return DAG.getConstant(0, N0.getValueType());
+ if (isOneConstant(N1))
+ return DAG.getConstant(0, DL, N0.getValueType());
// fold (mulhu x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, DL, VT);
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
@@ -2351,7 +2454,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
- DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(N1.getValueType())));
return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
}
}
@@ -2417,8 +2521,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
- // If the type twice as wide is legal, transform the mulhu to a wider multiply
- // plus a shift.
+ // If the type is twice as wide is legal, transform the mulhu to a wider
+ // multiply plus a shift.
if (VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
@@ -2429,7 +2533,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
- DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(Lo.getValueType())));
Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
// Compute the low part as N0.
Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
@@ -2447,8 +2552,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
- // If the type twice as wide is legal, transform the mulhu to a wider multiply
- // plus a shift.
+ // If the type is twice as wide is legal, transform the mulhu to a wider
+ // multiply plus a shift.
if (VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
@@ -2459,7 +2564,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
- DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(Lo.getValueType())));
Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
// Compute the low part as N0.
Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
@@ -2615,7 +2721,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// build vector of all zeros that might be illegal at this stage.
if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
if (!LegalTypes)
- ShOp = DAG.getConstant(0, VT);
+ ShOp = DAG.getConstant(0, SDLoc(N), VT);
else
ShOp = SDValue();
}
@@ -2636,7 +2742,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
ShOp = N0->getOperand(0);
if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
if (!LegalTypes)
- ShOp = DAG.getConstant(0, VT);
+ ShOp = DAG.getConstant(0, SDLoc(N), VT);
else
ShOp = SDValue();
}
@@ -2657,19 +2763,122 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
return SDValue();
}
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an And operation to a single value. This makes them reusable in the context
+/// of visitSELECT(). Rules involving constants are not included as
+/// visitSELECT() already handles those cases.
+SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
+ SDNode *LocReference) {
+ EVT VT = N1.getValueType();
+
+ // fold (and x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, SDLoc(LocReference), VT);
+ // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+ if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
+ if (isAllOnesConstant(LR)) {
+ // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+ if (Op1 == ISD::SETEQ) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ANDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ }
+ // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+ if (Op1 == ISD::SETGT) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
+ }
+ }
+ // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
+ if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
+ Op0 == Op1 && LL.getValueType().isInteger() &&
+ Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
+ (isAllOnesConstant(LR) && isNullConstant(RR)))) {
+ SDLoc DL(N0);
+ SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
+ LL, DAG.getConstant(1, DL,
+ LL.getValueType()));
+ AddToWorklist(ADDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
+ DAG.getConstant(2, DL, LL.getValueType()),
+ ISD::SETUGE);
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC,
+ getSetCCResultType(N0.getSimpleValueType())))))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ APInt ADDC = ADDI->getAPIntValue();
+ if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLI->getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDLoc DL(N0);
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, DL, VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ // Return N so it doesn't get rechecked!
+ return SDValue(LocReference, 0);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LL, LR, RL, RR, CC0, CC1;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
- unsigned BitWidth = VT.getScalarType().getSizeInBits();
// fold vector ops
if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
// fold (and x, 0) -> 0, vector edition
if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -2677,13 +2886,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return DAG.getConstant(
APInt::getNullValue(
N0.getValueType().getScalarType().getSizeInBits()),
- N0.getValueType());
+ SDLoc(N), N0.getValueType());
if (ISD::isBuildVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getConstant(
APInt::getNullValue(
N1.getValueType().getScalarType().getSizeInBits()),
- N1.getValueType());
+ SDLoc(N), N1.getValueType());
// fold (and x, -1) -> x, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
@@ -2692,25 +2901,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return N0;
}
- // fold (and x, undef) -> 0
- if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
// fold (and c1, c2) -> c1&c2
- if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
- if (N0C && !N1C)
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
// fold (and x, -1) -> x
- if (N1C && N1C->isAllOnesValue())
+ if (isAllOnesConstant(N1))
return N0;
// if (and x, c) is known to be zero, return 0
+ unsigned BitWidth = VT.getScalarType().getSizeInBits();
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// reassociate and
- SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
- if (RAND.getNode())
+ if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
return RAND;
// fold (and (or x, C), D) -> D if (C & D) == D
if (N1C && N0.getOpcode() == ISD::OR)
@@ -2840,117 +3049,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
- // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
- LL.getValueType().isInteger()) {
- // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
- if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
- }
- // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
- if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
- }
- // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
- if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
- }
- }
- // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
- if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
- Op0 == Op1 && LL.getValueType().isInteger() &&
- Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
- cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
- (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
- cast<ConstantSDNode>(RR)->isNullValue()))) {
- SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
- LL, DAG.getConstant(1, LL.getValueType()));
- AddToWorklist(ADDNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ADDNode,
- DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC,
- getSetCCResultType(N0.getSimpleValueType())))))
- return DAG.getSetCC(SDLoc(N), N0.getValueType(),
- LL, LR, Result);
- }
- }
-
- // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
- if (N0.getOpcode() == N1.getOpcode()) {
- SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
- if (Tmp.getNode()) return Tmp;
- }
-
- // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
- // fold (and (sra)) -> (and (srl)) when possible.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
- return SDValue(N, 0);
-
- // fold (zext_inreg (extload x)) -> (zextload x)
- if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- // If we zero all the possible extended bits, then we can turn this into
- // a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
- if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarType().getSizeInBits())) &&
- ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
- AddToWorklist(N);
- CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
- // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
- if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
- N0.hasOneUse()) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- // If we zero all the possible extended bits, then we can turn this into
- // a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
- if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarType().getSizeInBits())) &&
- ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
- AddToWorklist(N);
- CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
@@ -3001,8 +3099,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
unsigned LVTStoreBytes = LoadedVT.getStoreSize();
unsigned EVTStoreBytes = ExtVT.getStoreSize();
unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
- NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType,
- NewPtr, DAG.getConstant(PtrOff, PtrType));
+ SDLoc DL(LN0);
+ NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
+ NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
Alignment = MinAlign(Alignment, PtrOff);
}
@@ -3022,33 +3121,60 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
- if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
- VT.getSizeInBits() <= 64) {
- if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- APInt ADDC = ADDI->getAPIntValue();
- if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
- // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
- // immediate for an add, but it is legal if its top c2 bits are set,
- // transform the ADD so the immediate doesn't need to be materialized
- // in a register.
- if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
- APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
- SRLI->getZExtValue());
- if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
- ADDC |= Mask;
- if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
- SDValue NewAdd =
- DAG.getNode(ISD::ADD, SDLoc(N0), VT,
- N0.getOperand(0), DAG.getConstant(ADDC, VT));
- CombineTo(N0.getNode(), NewAdd);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
- }
- }
- }
+ if (SDValue Combined = visitANDLike(N0, N1, N))
+ return Combined;
+
+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
}
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ AddToWorklist(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ AddToWorklist(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
// fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
@@ -3159,9 +3285,12 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
}
SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
- if (OpSizeInBits > 16)
- Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res,
- DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+ if (OpSizeInBits > 16) {
+ SDLoc DL(N);
+ Res = DAG.getNode(ISD::SRL, DL, VT, Res,
+ DAG.getConstant(OpSizeInBits - 16, DL,
+ getShiftAmountTy(VT)));
+ }
return Res;
}
@@ -3299,33 +3428,125 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
return SDValue();
- SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT,
- SDValue(Parts[0],0));
+ SDLoc DL(N);
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
+ SDValue(Parts[0], 0));
// Result of the bswap should be rotated by 16. If it's not legal, then
// do (x << 16) | (x >> 16).
- SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+ SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
- return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt);
+ return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
- return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt);
- return DAG.getNode(ISD::OR, SDLoc(N), VT,
- DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt),
- DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt));
+ return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
+ return DAG.getNode(ISD::OR, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
+ DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
+}
+
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an Or operation to a single value \see visitANDLike().
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+ EVT VT = N1.getValueType();
+ // fold (or x, undef) -> -1
+ if (!LegalOperations &&
+ (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
+ SDLoc(LocReference), VT);
+ }
+ // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
+ // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+ // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+ if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
+ // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+ // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
+ if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ANDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC,
+ getSetCCResultType(N0.getValueType())))))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ if (const ConstantSDNode *N0O1C =
+ getAsNonOpaqueConstant(N0.getOperand(1))) {
+ if (const ConstantSDNode *N1O1C =
+ getAsNonOpaqueConstant(N1.getOperand(1))) {
+ // We can only do this xform if we know that bits from X that are set in
+ // C2 but not in C1 are already zero. Likewise for Y.
+ const APInt &LHSMask = N0O1C->getAPIntValue();
+ const APInt &RHSMask = N1O1C->getAPIntValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ SDLoc DL(LocReference);
+ return DAG.getNode(ISD::AND, DL, VT, X,
+ DAG.getConstant(LHSMask | RHSMask, DL, VT));
+ }
+ }
+ }
+ }
+
+ // (or (and X, M), (and X, N)) -> (and X, (or M, N))
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(0) == N1.getOperand(0) &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(1), N1.getOperand(1));
+ return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
+ }
+
+ return SDValue();
}
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LL, LR, RL, RR, CC0, CC1;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
// fold vector ops
if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
// fold (or x, 0) -> x, vector edition
if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -3339,13 +3560,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return DAG.getConstant(
APInt::getAllOnesValue(
N0.getValueType().getScalarType().getSizeInBits()),
- N0.getValueType());
+ SDLoc(N), N0.getValueType());
if (ISD::isBuildVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getConstant(
APInt::getAllOnesValue(
N1.getValueType().getScalarType().getSizeInBits()),
- N1.getValueType());
+ SDLoc(N), N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
@@ -3404,28 +3625,28 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
- // fold (or x, undef) -> -1
- if (!LegalOperations &&
- (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
- return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
- }
// fold (or c1, c2) -> c1|c2
- if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
- if (N0C && !N1C)
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
// fold (or x, 0) -> x
- if (N1C && N1C->isNullValue())
+ if (isNullConstant(N1))
return N0;
// fold (or x, -1) -> -1
- if (N1C && N1C->isAllOnesValue())
+ if (isAllOnesConstant(N1))
return N1;
// fold (or x, c) -> c iff (x & ~c) == 0
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
+ if (SDValue Combined = visitORLike(N0, N1, N))
+ return Combined;
+
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
SDValue BSwap = MatchBSwapHWord(N, N0, N1);
if (BSwap.getNode())
@@ -3435,8 +3656,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return BSwap;
// reassociate or
- SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
- if (ROR.getNode())
+ if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
// iff (c1 & c2) == 0.
@@ -3444,86 +3664,20 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
isa<ConstantSDNode>(N0.getOperand(1))) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
- if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1))
+ if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
+ N1C, C1))
return DAG.getNode(
ISD::AND, SDLoc(N), VT,
DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
return SDValue();
}
}
- // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
- LL.getValueType().isInteger()) {
- // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
- // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
- if (cast<ConstantSDNode>(LR)->isNullValue() &&
- (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
- }
- // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
- // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
- if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
- (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
- }
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC,
- getSetCCResultType(N0.getValueType())))))
- return DAG.getSetCC(SDLoc(N), N0.getValueType(),
- LL, LR, Result);
- }
- }
-
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode()) {
SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
if (Tmp.getNode()) return Tmp;
}
- // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
- if (N0.getOpcode() == ISD::AND &&
- N1.getOpcode() == ISD::AND &&
- N0.getOperand(1).getOpcode() == ISD::Constant &&
- N1.getOperand(1).getOpcode() == ISD::Constant &&
- // Don't increase # computations.
- (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
- // We can only do this xform if we know that bits from X that are set in C2
- // but not in C1 are already zero. Likewise for Y.
- const APInt &LHSMask =
- cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- const APInt &RHSMask =
- cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
-
- if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
- DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
- SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
- N0.getOperand(0), N1.getOperand(0));
- return DAG.getNode(ISD::AND, SDLoc(N), VT, X,
- DAG.getConstant(LHSMask | RHSMask, VT));
- }
- }
-
// See if this is some rotate idiom.
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
return SDValue(Rot, 0);
@@ -3751,7 +3905,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
}
- Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
}
return Rot.getNode();
@@ -3793,15 +3947,12 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LHS, RHS, CC;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
// fold (xor x, 0) -> x, vector edition
if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -3812,27 +3963,30 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// fold (xor x, undef) -> undef
if (N0.getOpcode() == ISD::UNDEF)
return N0;
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (xor c1, c2) -> c1^c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
- if (N0C && !N1C)
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
// fold (xor x, 0) -> x
- if (N1C && N1C->isNullValue())
+ if (isNullConstant(N1))
return N0;
// reassociate xor
- SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1);
- if (RXOR.getNode())
+ if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
return RXOR;
// fold !(x cc y) -> (x !cc y)
+ SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
bool isInt = LHS.getValueType().isInteger();
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
@@ -3853,18 +4007,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
- if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+ if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
N0.getNode()->hasOneUse() &&
isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
SDValue V = N0.getOperand(0);
- V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V,
- DAG.getConstant(1, V.getValueType()));
+ SDLoc DL(N0);
+ V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
+ DAG.getConstant(1, DL, V.getValueType()));
AddToWorklist(V.getNode());
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
- if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+ if (isOneConstant(N1) && VT == MVT::i1 &&
(N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
@@ -3876,7 +4031,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
- if (N1C && N1C->isAllOnesValue() &&
+ if (isAllOnesConstant(N1) &&
(N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
@@ -3897,21 +4052,48 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
if (N1C && N0.getOpcode() == ISD::XOR) {
- ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
- ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (N00C)
- return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1),
+ if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
DAG.getConstant(N1C->getAPIntValue() ^
- N00C->getAPIntValue(), VT));
- if (N01C)
- return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0),
+ N00C->getAPIntValue(), DL, VT));
+ }
+ if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getConstant(N1C->getAPIntValue() ^
- N01C->getAPIntValue(), VT));
+ N01C->getAPIntValue(), DL, VT));
+ }
}
// fold (xor x, x) -> 0
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
+ // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
+ // Here is a concrete example of this equivalence:
+ // i16 x == 14
+ // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
+ // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
+ //
+ // =>
+ //
+ // i16 ~1 == 0b1111111111111110
+ // i16 rol(~1, 14) == 0b1011111111111111
+ //
+ // Some additional tips to help conceptualize this transform:
+ // - Try to see the operation as placing a single zero in a value of all ones.
+ // - There exists no value for x which would allow the result to contain zero.
+ // - Values of x larger than the bitwidth are undefined and do not require a
+ // consistent result.
+ // - Pushing the zero left requires shifting one bits in from the right.
+ // A rotate left of ~1 is a nice way of achieving the desired result.
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
+ && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
+ N0.getOperand(1));
+ }
+
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
if (N0.getOpcode() == N1.getOpcode()) {
SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
@@ -3929,10 +4111,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
/// Handle transforms common to the three shifts, when the shift amount is a
/// constant.
SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
- // We can't and shouldn't fold opaque constants.
- if (Amt->isOpaque())
- return SDValue();
-
SDNode *LHS = N->getOperand(0).getNode();
if (!LHS->hasOneUse()) return SDValue();
@@ -3959,8 +4137,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
}
// We require the RHS of the binop to be a constant and not opaque as well.
- ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
- if (!BinOpCst || BinOpCst->isOpaque()) return SDValue();
+ ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
+ if (!BinOpCst) return SDValue();
// FIXME: disable this unless the input to the binop is a shift by a constant.
// If it is not a shift, it pessimizes some common cases like:
@@ -4013,14 +4191,17 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
SDValue N01 = N->getOperand(0).getOperand(1);
if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
- EVT TruncVT = N->getValueType(0);
- SDValue N00 = N->getOperand(0).getOperand(0);
- APInt TruncC = N01C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+ if (!N01C->isOpaque()) {
+ EVT TruncVT = N->getValueType(0);
+ SDValue N00 = N->getOperand(0).getOperand(0);
+ APInt TruncC = N01C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+ SDLoc DL(N);
- return DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
- DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00),
- DAG.getConstant(TruncC, TruncVT));
+ return DAG.getNode(ISD::AND, DL, TruncVT,
+ DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
+ DAG.getConstant(TruncC, DL, TruncVT));
+ }
}
}
@@ -4042,15 +4223,14 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
// If setcc produces all-one true value then:
@@ -4064,7 +4244,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
+ N01CV, N1CV))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
}
} else {
@@ -4074,10 +4255,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
// fold (shl c1, c2) -> c1<<c2
- if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
// fold (shl 0, x) -> 0
- if (N0C && N0C->isNullValue())
+ if (isNullConstant(N0))
return N0;
// fold (shl x, c >= size(x)) -> undef
if (N1C && N1C->getZExtValue() >= OpSizeInBits)
@@ -4087,11 +4269,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return N0;
// fold (shl undef, x) -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -4108,10 +4290,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t c1 = N0C1->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
+ SDLoc DL(N);
if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, DL, N1.getValueType()));
}
}
@@ -4131,12 +4314,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
EVT InnerShiftVT = N0Op0.getValueType();
uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
if (c2 >= OpSizeInBits - InnerShiftSize) {
+ SDLoc DL(N0);
if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
- DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT,
+ DAG.getNode(N0.getOpcode(), DL, VT,
N0Op0->getOperand(0)),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ DAG.getConstant(c1 + c2, DL, N1.getValueType()));
}
}
}
@@ -4154,8 +4338,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (c1 == c2) {
SDValue NewOp0 = N0.getOperand(0);
EVT CountVT = NewOp0.getOperand(1).getValueType();
- SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
- NewOp0, DAG.getConstant(c2, CountVT));
+ SDLoc DL(N);
+ SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
+ NewOp0,
+ DAG.getConstant(c2, DL, CountVT));
AddToWorklist(NewSHL.getNode());
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
}
@@ -4176,25 +4362,30 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue Shift;
if (c2 > c1) {
Mask = Mask.shl(c2 - c1);
- Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c2 - c1, N1.getValueType()));
+ SDLoc DL(N);
+ Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c2 - c1, DL, N1.getValueType()));
} else {
Mask = Mask.lshr(c1 - c2);
- Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1 - c2, N1.getValueType()));
+ SDLoc DL(N);
+ Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c1 - c2, DL, N1.getValueType()));
}
- return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
- DAG.getConstant(Mask, VT));
+ SDLoc DL(N0);
+ return DAG.getNode(ISD::AND, DL, VT, Shift,
+ DAG.getConstant(Mask, DL, VT));
}
}
}
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
unsigned BitSize = VT.getScalarSizeInBits();
+ SDLoc DL(N);
SDValue HiBitsMask =
DAG.getConstant(APInt::getHighBitsSet(BitSize,
- BitSize - N1C->getZExtValue()), VT);
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
+ BitSize - N1C->getZExtValue()),
+ DL, VT);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
HiBitsMask);
}
@@ -4210,7 +4401,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
}
- if (N1C) {
+ if (N1C && !N1C->isOpaque()) {
SDValue NewSHL = visitShiftByConstant(N, N1C);
if (NewSHL.getNode())
return NewSHL;
@@ -4222,27 +4413,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
N1C = isConstOrConstSplat(N1);
}
// fold (sra c1, c2) -> (sra c1, c2)
- if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
// fold (sra 0, x) -> 0
- if (N0C && N0C->isNullValue())
+ if (isNullConstant(N0))
return N0;
// fold (sra -1, x) -> -1
- if (N0C && N0C->isAllOnesValue())
+ if (isAllOnesConstant(N0))
return N0;
// fold (sra x, (setge c, size(x))) -> undef
if (N1C && N1C->getZExtValue() >= OpSizeInBits)
@@ -4270,8 +4461,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
if (Sum >= OpSizeInBits)
Sum = OpSizeInBits - 1;
- return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(Sum, N1.getValueType()));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Sum, DL, N1.getValueType()));
}
}
@@ -4303,14 +4495,15 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
TLI.isTruncateFree(VT, TruncVT)) {
- SDValue Amt = DAG.getConstant(ShiftAmt,
- getShiftAmountTy(N0.getOperand(0).getValueType()));
- SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT,
- N0.getOperand(0), Amt);
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT,
- Shift);
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N),
- N->getValueType(0), Trunc);
+ SDLoc DL(N);
+ SDValue Amt = DAG.getConstant(ShiftAmt, DL,
+ getShiftAmountTy(N0.getOperand(0).getValueType()));
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
+ N0.getOperand(0), Amt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
+ Shift);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL,
+ N->getValueType(0), Trunc);
}
}
}
@@ -4337,12 +4530,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT LargeVT = N0Op0.getValueType();
if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+ SDLoc DL(N);
SDValue Amt =
- DAG.getConstant(LargeShiftVal + N1C->getZExtValue(),
+ DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
- SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
N0Op0.getOperand(0), Amt);
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
}
}
}
@@ -4356,7 +4550,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
- if (N1C) {
+ if (N1C && !N1C->isOpaque()) {
SDValue NewSRA = visitShiftByConstant(N, N1C);
if (NewSRA.getNode())
return NewSRA;
@@ -4368,24 +4562,24 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
N1C = isConstOrConstSplat(N1);
}
// fold (srl c1, c2) -> c1 >>u c2
- if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
// fold (srl 0, x) -> 0
- if (N0C && N0C->isNullValue())
+ if (isNullConstant(N0))
return N0;
// fold (srl x, c >= size(x)) -> undef
if (N1C && N1C->getZExtValue() >= OpSizeInBits)
@@ -4396,17 +4590,18 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// if (srl x, c) is known to be zero, return 0
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SRL) {
if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t c1 = N01C->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
+ SDLoc DL(N);
if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, DL, N1.getValueType()));
}
}
@@ -4422,12 +4617,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
if (c1 + OpSizeInBits == InnerShiftSize) {
+ SDLoc DL(N0);
if (c1 + c2 >= InnerShiftSize)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT,
- DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT,
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getNode(ISD::SRL, DL, InnerShiftVT,
N0.getOperand(0)->getOperand(0),
- DAG.getConstant(c1 + c2, ShiftCountVT)));
+ DAG.getConstant(c1 + c2, DL,
+ ShiftCountVT)));
}
}
@@ -4436,8 +4633,9 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
unsigned BitSize = N0.getScalarValueSizeInBits();
if (BitSize <= 64) {
uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(~0ULL >> ShAmt, VT));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL >> ShAmt, DL, VT));
}
}
@@ -4451,14 +4649,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
uint64_t ShiftAmt = N1C->getZExtValue();
- SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT,
+ SDLoc DL0(N0);
+ SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
N0.getOperand(0),
- DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
+ DAG.getConstant(ShiftAmt, DL0,
+ getShiftAmountTy(SmallVT)));
AddToWorklist(SmallShift.getNode());
APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
- DAG.getConstant(Mask, VT));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
+ DAG.getConstant(Mask, DL, VT));
}
}
@@ -4477,12 +4678,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
- if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+ if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
// If all of the bits input the to ctlz node are known to be zero, then
// the result of the ctlz is "32" and the result of the shift is one.
APInt UnknownBits = ~KnownZero;
- if (UnknownBits == 0) return DAG.getConstant(1, VT);
+ if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
// Otherwise, check to see if there is exactly one bit input to the ctlz.
if ((UnknownBits & (UnknownBits - 1)) == 0) {
@@ -4494,13 +4695,16 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue Op = N0.getOperand(0);
if (ShAmt) {
- Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op,
- DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
+ SDLoc DL(N0);
+ Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+ DAG.getConstant(ShAmt, DL,
+ getShiftAmountTy(Op.getValueType())));
AddToWorklist(Op.getNode());
}
- return DAG.getNode(ISD::XOR, SDLoc(N), VT,
- Op, DAG.getConstant(1, VT));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT,
+ Op, DAG.getConstant(1, DL, VT));
}
}
@@ -4517,7 +4721,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
- if (N1C) {
+ if (N1C && !N1C->isOpaque()) {
SDValue NewSRL = visitShiftByConstant(N, N1C);
if (NewSRL.getNode())
return NewSRL;
@@ -4565,7 +4769,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ctlz c1) -> c2
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
return SDValue();
}
@@ -4575,7 +4779,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ctlz_zero_undef c1) -> c2
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
return SDValue();
}
@@ -4585,7 +4789,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (cttz c1) -> c2
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
return SDValue();
}
@@ -4595,7 +4799,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (cttz_zero_undef c1) -> c2
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
return SDValue();
}
@@ -4605,7 +4809,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ctpop c1) -> c2
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
return SDValue();
}
@@ -4651,23 +4855,19 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
EVT VT = N->getValueType(0);
EVT VT0 = N0.getValueType();
// fold (select C, X, X) -> X
if (N1 == N2)
return N1;
- // fold (select true, X, Y) -> X
- if (N0C && !N0C->isNullValue())
- return N1;
- // fold (select false, X, Y) -> Y
- if (N0C && N0C->isNullValue())
- return N2;
+ if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
+ // fold (select true, X, Y) -> X
+ // fold (select false, X, Y) -> Y
+ return !N0C->isNullValue() ? N1 : N2;
+ }
// fold (select C, 1, X) -> (or C, X)
- if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+ if (VT == MVT::i1 && isOneConstant(N1))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select C, 0, 1) -> (xor C, 1)
// We can't do this reliably if integer based booleans have different contents
@@ -4684,40 +4884,43 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
TLI.getBooleanContents(false, true) &&
TLI.getBooleanContents(false, false) ==
TargetLowering::ZeroOrOneBooleanContent)) &&
- N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+ isNullConstant(N1) && isOneConstant(N2)) {
SDValue XORNode;
- if (VT == VT0)
- return DAG.getNode(ISD::XOR, SDLoc(N), VT0,
- N0, DAG.getConstant(1, VT0));
- XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0,
- N0, DAG.getConstant(1, VT0));
+ if (VT == VT0) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT0,
+ N0, DAG.getConstant(1, DL, VT0));
+ }
+ SDLoc DL0(N0);
+ XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
+ N0, DAG.getConstant(1, DL0, VT0));
AddToWorklist(XORNode.getNode());
if (VT.bitsGT(VT0))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
}
// fold (select C, 0, X) -> (and (not C), X)
- if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+ if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
AddToWorklist(NOTNode.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
}
// fold (select C, X, 1) -> (or (not C), X)
- if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+ if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
AddToWorklist(NOTNode.getNode());
return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
}
// fold (select C, X, 0) -> (and C, X)
- if (VT == MVT::i1 && N2C && N2C->isNullValue())
+ if (VT == MVT::i1 && isNullConstant(N2))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
// fold (select X, X, Y) -> (or X, Y)
// fold (select X, 1, Y) -> (or X, Y)
- if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+ if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select X, Y, X) -> (and X, Y)
// fold (select X, Y, 0) -> (and X, Y)
- if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+ if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
// If we can fold this based on the true/false value, do so.
@@ -4757,6 +4960,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SimplifySelect(SDLoc(N), N0, N1, N2);
}
+ if (VT0 == MVT::i1) {
+ if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ // select (and Cond0, Cond1), X, Y
+ // -> select Cond0, (select Cond1, X, Y), Y
+ if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+ InnerSelect, N2);
+ }
+ // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+ if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+ InnerSelect);
+ }
+ }
+
+ // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+ if (N1->getOpcode() == ISD::SELECT) {
+ SDValue N1_0 = N1->getOperand(0);
+ SDValue N1_1 = N1->getOperand(1);
+ SDValue N1_2 = N1->getOperand(2);
+ if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
+ // Create the actual and node if we can generate good code for it.
+ if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
+ N0, N1_0);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
+ N1_1, N2);
+ }
+ // Otherwise see if we can optimize the "and" to a better pattern.
+ if (SDValue Combined = visitANDLike(N0, N1_0, N))
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+ N1_1, N2);
+ }
+ }
+ // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+ if (N2->getOpcode() == ISD::SELECT) {
+ SDValue N2_0 = N2->getOperand(0);
+ SDValue N2_1 = N2->getOperand(1);
+ SDValue N2_2 = N2->getOperand(2);
+ if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
+ // Create the actual or node if we can generate good code for it.
+ if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
+ N0, N2_0);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
+ N1, N2_2);
+ }
+ // Otherwise see if we can optimize to a better pattern.
+ if (SDValue Combined = visitORLike(N0, N2_0, N))
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+ N1, N2_2);
+ }
+ }
+ }
+
return SDValue();
}
@@ -4832,6 +5098,67 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
}
+SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
+ MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+ SDValue Mask = MSC->getMask();
+ SDValue Data = MSC->getValue();
+ SDLoc DL(N);
+
+ // If the MSCATTER data type requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+ if (Mask.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+ SDValue MaskLo, MaskHi, Lo, Hi;
+ std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
+
+ SDValue Chain = MSC->getChain();
+
+ EVT MemoryVT = MSC->getMemoryVT();
+ unsigned Alignment = MSC->getOriginalAlignment();
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+ SDValue BasePtr = MSC->getBasePtr();
+ SDValue IndexLo, IndexHi;
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MSC->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, MSC->getAAInfo(), MSC->getRanges());
+
+ SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
+ Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
+ DL, OpsLo, MMO);
+
+ SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
+ Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+ DL, OpsHi, MMO);
+
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (Level >= AfterLegalizeTypes)
@@ -4878,7 +5205,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MST->getPointerInfo(),
+ getMachineMemOperand(MST->getPointerInfo(),
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, MST->getAAInfo(), MST->getRanges());
@@ -4887,10 +5214,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, Ptr.getValueType()));
+ DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
MMO = DAG.getMachineFunction().
- getMachineMemOperand(MST->getPointerInfo(),
+ getMachineMemOperand(MST->getPointerInfo(),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, MST->getAAInfo(),
MST->getRanges());
@@ -4906,6 +5233,83 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitMGATHER(SDNode *N) {
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
+ MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
+ SDValue Mask = MGT->getMask();
+ SDLoc DL(N);
+
+ // If the MGATHER result requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+
+ if (Mask.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue MaskLo, MaskHi, Lo, Hi;
+ std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+ SDValue Src0 = MGT->getValue();
+ SDValue Src0Lo, Src0Hi;
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+ SDValue Chain = MGT->getChain();
+ EVT MemoryVT = MGT->getMemoryVT();
+ unsigned Alignment = MGT->getOriginalAlignment();
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue BasePtr = MGT->getBasePtr();
+ SDValue Index = MGT->getIndex();
+ SDValue IndexLo, IndexHi;
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MGT->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+ SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
+ Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
+ MMO);
+
+ SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
+ Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
+ MMO);
+
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
+
+ SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+
+ SDValue RetOps[] = { GatherRes, Chain };
+ return DAG.getMergeValues(RetOps, DL);
+}
+
SDValue DAGCombiner::visitMLOAD(SDNode *N) {
if (Level >= AfterLegalizeTypes)
@@ -4953,7 +5357,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
+ getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
@@ -4962,10 +5366,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, Ptr.getValueType()));
+ DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
+ getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
@@ -5021,7 +5425,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
EVT VT = LHS.getValueType();
SDValue Shift = DAG.getNode(
ISD::SRA, DL, VT, LHS,
- DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
+ DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
@@ -5029,6 +5433,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
}
}
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
// If the VSELECT result requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -5141,7 +5548,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
EVT VT = N->getValueType(0);
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
- Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!");
+ Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
+ && "Expected EXTEND dag node in input!");
// fold (sext c1) -> c1
// fold (zext c1) -> c1
@@ -5163,7 +5571,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
unsigned ShAmt = VTBits - EVTBits;
SmallVector<SDValue, 8> Elts;
- unsigned NumElts = N0->getNumOperands();
+ unsigned NumElts = VT.getVectorNumElements();
SDLoc DL(N);
for (unsigned i=0; i != NumElts; ++i) {
@@ -5173,14 +5581,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
continue;
}
+ SDLoc DL(Op);
ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
- if (Opcode == ISD::SIGN_EXTEND)
+ if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
- SVT));
+ DL, SVT));
else
Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
- SVT));
+ DL, SVT));
}
return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
@@ -5271,6 +5680,102 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
}
}
+// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
+SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
+
+ assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) &&
+ "Unexpected node type (not an extend)!");
+
+ // fold (sext (load x)) to multiple smaller sextloads; same for zext.
+ // For example, on a target with legal v4i32, but illegal v8i32, turn:
+ // (v8i32 (sext (v8i16 (load x))))
+ // into:
+ // (v8i32 (concat_vectors (v4i32 (sextload x)),
+ // (v4i32 (sextload (x + 16)))))
+ // Where uses of the original load, i.e.:
+ // (v8i16 (load x))
+ // are replaced with:
+ // (v8i16 (truncate
+ // (v8i32 (concat_vectors (v4i32 (sextload x)),
+ // (v4i32 (sextload (x + 16)))))))
+ //
+ // This combine is only applicable to illegal, but splittable, vectors.
+ // All legal types, and illegal non-vector types, are handled elsewhere.
+ // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
+ //
+ if (N0->getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+ if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
+ !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
+ !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ return SDValue();
+
+ SmallVector<SDNode *, 4> SetCCs;
+ if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
+ return SDValue();
+
+ ISD::LoadExtType ExtType =
+ N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+
+ // Try to split the vector types to get down to legal types.
+ EVT SplitSrcVT = SrcVT;
+ EVT SplitDstVT = DstVT;
+ while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
+ SplitSrcVT.getVectorNumElements() > 1) {
+ SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
+ SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
+ }
+
+ if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ const unsigned NumSplits =
+ DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
+ const unsigned Stride = SplitSrcVT.getStoreSize();
+ SmallVector<SDValue, 4> Loads;
+ SmallVector<SDValue, 4> Chains;
+
+ SDValue BasePtr = LN0->getBasePtr();
+ for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
+ const unsigned Offset = Idx * Stride;
+ const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
+
+ SDValue SplitLoad = DAG.getExtLoad(
+ ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
+ LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT,
+ LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(),
+ Align, LN0->getAAInfo());
+
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, DL, BasePtr.getValueType()));
+
+ Loads.push_back(SplitLoad.getValue(0));
+ Chains.push_back(SplitLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
+
+ CombineTo(N, NewValue);
+
+ // Replace uses of the original load (before extension)
+ // with a truncate of the concatenated sextloaded vectors.
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
+ CombineTo(N0.getNode(), Trunc, NewChain);
+ ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
+ (ISD::NodeType)N->getOpcode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -5337,17 +5842,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
// fold (sext (load x)) -> (sext (truncate (sextload x)))
- // None of the supported targets knows how to perform load and sign extend
- // on vectors in one instruction. We only perform this transformation on
- // scalars.
- if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
- ISD::isUNINDEXEDLoad(N0.getNode()) &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ // Only generate vector extloads when 1) they're legal, and 2) they are
+ // deemed desirable by the target.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ ((!LegalOperations && !VT.isVector() &&
+ !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+ if (VT.isVector())
+ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
@@ -5364,6 +5870,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
}
+ // fold (sext (load x)) to multiple smaller sextloads.
+ // Only on illegal but splittable vectors.
+ if (SDValue ExtLoad = CombineExtLoad(N))
+ return ExtLoad;
+
// fold (sext (sextload x)) -> (sext (truncate (sextload x)))
// fold (sext ( extload x)) -> (sext (truncate (sextload x)))
if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
@@ -5407,14 +5918,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.sext(VT.getSizeInBits());
- SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
- ExtLoad, DAG.getConstant(Mask, VT));
+ SDLoc DL(N);
+ SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+ ExtLoad, DAG.getConstant(Mask, DL, VT));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
CombineTo(N, And);
CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
ISD::SIGN_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -5457,11 +5969,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
unsigned ElementWidth = VT.getScalarType().getSizeInBits();
+ SDLoc DL(N);
SDValue NegOne =
- DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
+ DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
SDValue SCC =
- SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
- NegOne, DAG.getConstant(0, VT),
+ SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+ NegOne, DAG.getConstant(0, DL, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
@@ -5473,7 +5986,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
N0.getOperand(0), N0.getOperand(1), CC);
return DAG.getSelect(DL, VT, SetCC,
- NegOne, DAG.getConstant(0, VT));
+ NegOne, DAG.getConstant(0, DL, VT));
}
}
}
@@ -5507,11 +6020,9 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
SDValue Op1 = N->getOperand(1);
assert(Op0.getValueType() == Op1.getValueType());
- ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
- ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
- if (COp0 && COp0->isNullValue())
+ if (isNullConstant(Op0))
Op = Op1;
- else if (COp1 && COp1->isNullValue())
+ else if (isNullConstant(Op1))
Op = Op0;
else
return false;
@@ -5622,22 +6133,24 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- X, DAG.getConstant(Mask, VT));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT,
+ X, DAG.getConstant(Mask, DL, VT));
}
// fold (zext (load x)) -> (zext (truncate (zextload x)))
- // None of the supported targets knows how to perform load and vector_zext
- // on vectors in one instruction. We only perform this transformation on
- // scalars.
- if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
- ISD::isUNINDEXEDLoad(N0.getNode()) &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ // Only generate vector extloads when 1) they're legal, and 2) they are
+ // deemed desirable by the target.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ ((!LegalOperations && !VT.isVector() &&
+ !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+ if (VT.isVector())
+ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
@@ -5655,6 +6168,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
}
+ // fold (zext (load x)) to multiple smaller zextloads.
+ // Only on illegal but splittable vectors.
+ if (SDValue ExtLoad = CombineExtLoad(N))
+ return ExtLoad;
+
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
@@ -5677,14 +6195,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
- SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
- ExtLoad, DAG.getConstant(Mask, VT));
+ SDLoc DL(N);
+ SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+ ExtLoad, DAG.getConstant(Mask, DL, VT));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
CombineTo(N, And);
CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
ISD::ZERO_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -5722,19 +6241,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
// Only do this before legalize for now.
EVT EltVT = VT.getVectorElementType();
+ SDLoc DL(N);
SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
- DAG.getConstant(1, EltVT));
+ DAG.getConstant(1, DL, EltVT));
if (VT.getSizeInBits() == N0VT.getSizeInBits())
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
// for that matter). Check to see that they are the same size. If so,
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getSetCC(DL, VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
- DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
OneOps));
// If the desired elements are smaller or larger than the source
@@ -5747,18 +6267,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
SDValue VsetCC =
- DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
+ DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT),
- DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps));
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getSExtOrTrunc(VsetCC, DL, VT),
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps));
}
// zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDLoc DL(N);
SDValue SCC =
- SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
- DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
}
@@ -5850,8 +6371,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- X, DAG.getConstant(Mask, VT));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT,
+ X, DAG.getConstant(Mask, DL, VT));
}
// fold (aext (load x)) -> (aext (truncate (extload x)))
@@ -5934,9 +6456,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
}
// aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDLoc DL(N);
SDValue SCC =
- SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
- DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode())
return SCC;
@@ -5957,7 +6480,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
const APInt &CVal = CV->getAPIntValue();
APInt NewVal = CVal & Mask;
if (NewVal != CVal)
- return DAG.getConstant(NewVal, V.getValueType());
+ return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
break;
}
case ISD::OR:
@@ -5972,7 +6495,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
// Only look at single-use SRLs.
if (!V.getNode()->hasOneUse())
break;
- if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
// See if we can recursively simplify the LHS.
unsigned Amt = RHSC->getZExtValue();
@@ -6117,9 +6640,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
uint64_t PtrOff = ShAmt / 8;
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
- SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0),
+ SDLoc DL(LN0);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
PtrType, LN0->getBasePtr(),
- DAG.getConstant(PtrOff, PtrType));
+ DAG.getConstant(PtrOff, DL, PtrType));
AddToWorklist(NewPtr.getNode());
SDValue Load;
@@ -6148,11 +6672,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
// no larger than the source) then the useful bits of the result are
// zero; we can't simply return the shortened shift, because the result
// of that operation is undefined.
+ SDLoc DL(N0);
if (ShLeftAmt >= VT.getSizeInBits())
- Result = DAG.getConstant(0, VT);
+ Result = DAG.getConstant(0, DL, VT);
else
- Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT,
- Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+ Result = DAG.getNode(ISD::SHL, DL, VT,
+ Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
}
// Return the new loaded value.
@@ -6279,7 +6804,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
- Op.getValueType()));
+ SDLoc(Op), Op.getValueType()));
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
@@ -6288,6 +6813,20 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -6297,7 +6836,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (N0.getValueType() == N->getValueType(0))
return N0;
// fold (truncate c1) -> c1
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
// fold (truncate (truncate x)) -> (truncate x)
if (N0.getOpcode() == ISD::TRUNCATE)
@@ -6350,9 +6889,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
NVT, N0.getOperand(0));
+ SDLoc DL(N);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- SDLoc(N), TrTy, V,
- DAG.getConstant(Index, IndexTy));
+ DL, TrTy, V,
+ DAG.getConstant(Index, DL, IndexTy));
}
}
@@ -6598,13 +7138,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
N0.getOperand(0));
AddToWorklist(NewConv.getNode());
+ SDLoc DL(N);
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
- return DAG.getNode(ISD::XOR, SDLoc(N), VT,
- NewConv, DAG.getConstant(SignBit, VT));
+ return DAG.getNode(ISD::XOR, DL, VT,
+ NewConv, DAG.getConstant(SignBit, DL, VT));
assert(N0.getOpcode() == ISD::FABS);
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- NewConv, DAG.getConstant(~SignBit, VT));
+ return DAG.getNode(ISD::AND, DL, VT,
+ NewConv, DAG.getConstant(~SignBit, DL, VT));
}
// fold (bitconvert (fcopysign cst, x)) ->
@@ -6629,9 +7170,11 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
} else if (OrigXWidth > VTWidth) {
// To get the sign bit in the right place, we have to shift it right
// before truncating.
- X = DAG.getNode(ISD::SRL, SDLoc(X),
+ SDLoc DL(X);
+ X = DAG.getNode(ISD::SRL, DL,
X.getValueType(), X,
- DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+ DAG.getConstant(OrigXWidth-VTWidth, DL,
+ X.getValueType()));
AddToWorklist(X.getNode());
X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
AddToWorklist(X.getNode());
@@ -6639,13 +7182,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
- X, DAG.getConstant(SignBit, VT));
+ X, DAG.getConstant(SignBit, SDLoc(X), VT));
AddToWorklist(X.getNode());
SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
VT, N0.getOperand(0));
Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
- Cst, DAG.getConstant(~SignBit, VT));
+ Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
AddToWorklist(Cst.getNode());
return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
@@ -6659,6 +7202,51 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
return CombineLD;
}
+ // Remove double bitcasts from shuffles - this is often a legacy of
+ // XformToShuffleWithZero being used to combine bitmaskings (of
+ // float vectors bitcast to integer vectors) into shuffles.
+ // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
+ N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
+ VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
+ !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
+
+ // If operands are a bitcast, peek through if it casts the original VT.
+ // If operands are a UNDEF or constant, just bitcast back to original VT.
+ auto PeekThroughBitcast = [&](SDValue Op) {
+ if (Op.getOpcode() == ISD::BITCAST &&
+ Op.getOperand(0)->getValueType(0) == VT)
+ return SDValue(Op.getOperand(0));
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+ return SDValue();
+ };
+
+ SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
+ SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
+ if (!(SV0 && SV1))
+ return SDValue();
+
+ int MaskScale =
+ VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
+ SmallVector<int, 8> NewMask;
+ for (int M : SVN->getMask())
+ for (int i = 0; i != MaskScale; ++i)
+ NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
+
+ bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
+ if (!LegalMask) {
+ std::swap(SV0, SV1);
+ ShuffleVectorSDNode::commuteMask(NewMask);
+ LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
+ }
+
+ if (LegalMask)
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
+ }
+
return SDValue();
}
@@ -6727,6 +7315,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
+ SDLoc DL(BV);
+
// Okay, we know the src/dst types are both integers of differing types.
// Handling growing first.
assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
@@ -6753,16 +7343,15 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
if (EltIsUndef)
Ops.push_back(DAG.getUNDEF(DstEltVT));
else
- Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+ Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
}
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
}
// Finally, this must be the case where we are shrinking elements: each input
// turns into multiple outputs.
- bool isS2V = ISD::isScalarToVector(BV);
unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
NumOutputsPerInput*BV->getNumOperands());
@@ -6770,8 +7359,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
- for (unsigned j = 0; j != NumOutputsPerInput; ++j)
- Ops.push_back(DAG.getUNDEF(DstEltVT));
+ Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
continue;
}
@@ -6780,11 +7368,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
APInt ThisVal = OpVal.trunc(DstBitSize);
- Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
- if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
- // Simply turn this into a SCALAR_TO_VECTOR of the new type.
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
- Ops[0]);
+ Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
OpVal = OpVal.lshr(DstBitSize);
}
@@ -6793,7 +7377,450 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
}
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+}
+
+/// Try to perform FMA combining on a given FADD node.
+SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath);
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations &&
+ TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA = ((!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ UnsafeFPMath);
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ bool LookThroughFPExt = TLI.isFPExtFree(VT);
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL &&
+ (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL &&
+ (Aggressive || N1->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+
+ // Look through FP_EXTEND nodes to do more combining.
+ if (UnsafeFPMath && LookThroughFPExt) {
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)), N1);
+ }
+
+ // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)), N0);
+ }
+ }
+
+ // More folding opportunities when target permits.
+ if ((UnsafeFPMath || HasFMAD) && Aggressive) {
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
+ if (N0.getOpcode() == PreferredFusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ N1));
+ }
+
+ // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
+ if (N1->getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N1.getOperand(0), N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N1.getOperand(2).getOperand(0),
+ N1.getOperand(2).getOperand(1),
+ N0));
+ }
+
+ if (UnsafeFPMath && LookThroughFPExt) {
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ auto FoldFAddFMAFPExtFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (N020.getOpcode() == ISD::FMUL)
+ return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+ N020.getOperand(0), N020.getOperand(1),
+ N1);
+ }
+ }
+
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ auto FoldFAddFPExtFMAFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (N002.getOpcode() == ISD::FMUL)
+ return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+ N002.getOperand(0), N002.getOperand(1),
+ N1);
+ }
+ }
+
+ // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+ // -> (fma y, z, (fma (fpext u), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode) {
+ SDValue N12 = N1.getOperand(2);
+ if (N12.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N12.getOperand(0);
+ if (N120.getOpcode() == ISD::FMUL)
+ return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+ N120.getOperand(0), N120.getOperand(1),
+ N0);
+ }
+ }
+
+ // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+ // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == PreferredFusedOpcode) {
+ SDValue N102 = N10.getOperand(2);
+ if (N102.getOpcode() == ISD::FMUL)
+ return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+ N102.getOperand(0), N102.getOperand(1),
+ N0);
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// Try to perform FMA combining on a given FSUB node.
+SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath);
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations &&
+ TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA = ((!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ UnsafeFPMath);
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ bool LookThroughFPExt = TLI.isFPExtFree(VT);
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL &&
+ (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL &&
+ (Aggressive || N1->hasOneUse()))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+
+ // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // Look through FP_EXTEND nodes to do more combining.
+ if (UnsafeFPMath && LookThroughFPExt) {
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)),
+ N0);
+ }
+
+ // fold (fsub (fpext (fneg (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x), (fpext y), z))
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FNEG) {
+ SDValue N000 = N00.getOperand(0);
+ if (N000.getOpcode() == ISD::FMUL) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
+ }
+ }
+ }
+
+ // fold (fsub (fneg (fpext (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x)), (fpext y), z)
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FNEG) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N000 = N00.getOperand(0);
+ if (N000.getOpcode() == ISD::FMUL) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
+ }
+ }
+ }
+
+ }
+
+ // More folding opportunities when target permits.
+ if ((UnsafeFPMath || HasFMAD) && Aggressive) {
+ // fold (fsub (fma x, y, (fmul u, v)), z)
+ // -> (fma x, y (fma u, v, (fneg z)))
+ if (N0.getOpcode() == PreferredFusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
+ }
+
+ // fold (fsub x, (fma y, z, (fmul u, v)))
+ // -> (fma (fneg y), z, (fma (fneg u), v, x))
+ if (N1.getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ SDValue N20 = N1.getOperand(2).getOperand(0);
+ SDValue N21 = N1.getOperand(2).getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N20),
+
+ N21, N0));
+ }
+
+ if (UnsafeFPMath && LookThroughFPExt) {
+ // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (N020.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
+ }
+ }
+
+ // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y),
+ // (fma (fpext u), (fpext v), (fneg z)))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (N002.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
+ }
+ }
+
+ // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+ // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N1.getOperand(2).getOperand(0);
+ if (N120.getOpcode() == ISD::FMUL) {
+ SDValue N1200 = N120.getOperand(0);
+ SDValue N1201 = N120.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1200)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1201),
+ N0));
+ }
+ }
+
+ // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+ // -> (fma (fneg (fpext y)), (fpext z),
+ // (fma (fneg (fpext u)), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ SDValue N102 = N1.getOperand(0).getOperand(2);
+ if (N102.getOpcode() == ISD::FMUL) {
+ SDValue N1020 = N102.getOperand(0);
+ SDValue N1021 = N102.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N100)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1020)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1021),
+ N0));
+ }
+ }
+ }
+ }
+
+ return SDValue();
}
SDValue DAGCombiner::visitFADD(SDNode *N) {
@@ -6802,32 +7829,32 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
- if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
- }
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
- return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
+ return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
- return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0,
+ return DAG.getNode(ISD::FSUB, DL, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
- return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1,
+ return DAG.getNode(ISD::FSUB, DL, VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations));
// If 'unsafe math' is enabled, fold lots of things.
@@ -6837,23 +7864,22 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
bool AllowNewConst = (Level < AfterLegalizeDAG);
// fold (fadd A, 0) -> A
- if (N1CFP && N1CFP->getValueAPF().isZero())
+ if (N1CFP && N1CFP->isZero())
return N0;
// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(1)))
- return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FADD, SDLoc(N), VT,
- N0.getOperand(1), N1));
+ return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
+ DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
- return DAG.getConstantFP(0.0, VT);
+ return DAG.getConstantFP(0.0, DL, VT);
// If allowed, fold (fadd x, (fneg x)) -> 0.0
if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
- return DAG.getConstantFP(0.0, VT);
+ return DAG.getConstantFP(0.0, DL, VT);
// We can fold chains of FADD's of the same value into multiplications.
// This transform is not safe in general because we are reducing the number
@@ -6865,21 +7891,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP01, 0),
- DAG.getConstantFP(1.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+ DAG.getConstantFP(1.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
}
// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP01, 0),
- DAG.getConstantFP(2.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(0), NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+ DAG.getConstantFP(2.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
}
}
@@ -6889,20 +7912,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP11, 0),
- DAG.getConstantFP(1.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+ DAG.getConstantFP(1.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
}
// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP11, 0),
- DAG.getConstantFP(2.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+ DAG.getConstantFP(2.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
}
}
@@ -6910,18 +7931,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
- (N0.getOperand(0) == N1))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N1, DAG.getConstantFP(3.0, VT));
+ (N0.getOperand(0) == N1)) {
+ return DAG.getNode(ISD::FMUL, DL, VT,
+ N1, DAG.getConstantFP(3.0, DL, VT));
+ }
}
if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
- N1.getOperand(0) == N0)
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0, DAG.getConstantFP(3.0, VT));
+ N1.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, DL, VT,
+ N0, DAG.getConstantFP(3.0, DL, VT));
+ }
}
// (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
@@ -6929,81 +7952,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&
- N0.getOperand(0) == N1.getOperand(0))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(0), DAG.getConstantFP(4.0, VT));
+ N0.getOperand(0) == N1.getOperand(0)) {
+ return DAG.getNode(ISD::FMUL, DL, VT,
+ N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
+ }
}
} // enable-unsafe-fp-math
// FADD -> FMA combines:
- if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
-
- // fold (fadd (fmul x, y), z) -> (fma x, y, z)
- if (N0.getOpcode() == ISD::FMUL &&
- (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1), N1);
-
- // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
- // Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(0), N1.getOperand(1), N0);
-
- // When FP_EXTEND nodes are free on the target, and there is an opportunity
- // to combine into FMA, arrange such nodes accordingly.
- if (TLI.isFPExtFree(VT)) {
-
- // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N00.getOperand(1)), N1);
- }
-
- // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
- // Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N10.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N10.getOperand(1)), N0);
- }
- }
-
- // More folding opportunities when target permits.
- if (TLI.enableAggressiveFMAFusion(VT)) {
-
- // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (N0.getOpcode() == ISD::FMA &&
- N0.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- N1));
-
- // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (N1->getOpcode() == ISD::FMA &&
- N1.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(0), N1.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(2).getOperand(0),
- N1.getOperand(2).getOperand(1),
- N0));
- }
+ SDValue Fused = visitFADDForFMACombine(N);
+ if (Fused) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
}
return SDValue();
@@ -7019,14 +7979,13 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
- if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
- }
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
@@ -7036,11 +7995,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
// (fsub A, 0) -> A
- if (N1CFP && N1CFP->getValueAPF().isZero())
+ if (N1CFP && N1CFP->isZero())
return N0;
// (fsub 0, B) -> -B
- if (N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (N0CFP && N0CFP->isZero()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
@@ -7049,7 +8008,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// (fsub x, x) -> 0.0
if (N0 == N1)
- return DAG.getConstantFP(0.0f, VT);
+ return DAG.getConstantFP(0.0f, dl, VT);
// (fsub x, (fadd x, y)) -> (fneg y)
// (fsub x, (fadd y, x)) -> (fneg y)
@@ -7066,138 +8025,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// FSUB -> FMA combines:
- if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
-
- // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (N0.getOpcode() == ISD::FMUL &&
- (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, dl, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, dl, VT, N1));
-
- // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
- // Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT,
- N1.getOperand(0)),
- N1.getOperand(1), N0);
-
- // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
- if (N0.getOpcode() == ISD::FNEG &&
- N0.getOperand(0).getOpcode() == ISD::FMUL &&
- ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
- TLI.enableAggressiveFMAFusion(VT))) {
- SDValue N00 = N0.getOperand(0).getOperand(0);
- SDValue N01 = N0.getOperand(0).getOperand(1);
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
- DAG.getNode(ISD::FNEG, dl, VT, N1));
- }
-
- // When FP_EXTEND nodes are free on the target, and there is an opportunity
- // to combine into FMA, arrange such nodes accordingly.
- if (TLI.isFPExtFree(VT)) {
-
- // fold (fsub (fpext (fmul x, y)), z)
- // -> (fma (fpext x), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
- }
-
- // fold (fsub x, (fpext (fmul y, z)))
- // -> (fma (fneg (fpext y)), (fpext z), x)
- // Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
- VT, N10.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N10.getOperand(1)),
- N0);
- }
-
- // fold (fsub (fpext (fneg (fmul, x, y))), z)
- // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FNEG) {
- SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
- VT, N000.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N000.getOperand(1)),
- DAG.getNode(ISD::FNEG, dl, VT, N1));
- }
- }
- }
-
- // fold (fsub (fneg (fpext (fmul, x, y))), z)
- // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FNEG) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FP_EXTEND) {
- SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
- VT, N000.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N000.getOperand(1)),
- DAG.getNode(ISD::FNEG, dl, VT, N1));
- }
- }
- }
- }
-
- // More folding opportunities when target permits.
- if (TLI.enableAggressiveFMAFusion(VT)) {
-
- // fold (fsub (fma x, y, (fmul u, v)), z)
- // -> (fma x, y (fma u, v, (fneg z)))
- if (N0.getOpcode() == ISD::FMA &&
- N0.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N1)));
-
- // fold (fsub x, (fma y, z, (fmul u, v)))
- // -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (N1.getOpcode() == ISD::FMA &&
- N1.getOperand(2).getOpcode() == ISD::FMUL) {
- SDValue N20 = N1.getOperand(2).getOperand(0);
- SDValue N21 = N1.getOperand(2).getOperand(1);
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N1.getOperand(0)),
- N1.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N20),
- N21, N0));
- }
- }
+ SDValue Fused = visitFSUBForFMACombine(N);
+ if (Fused) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
}
return SDValue();
@@ -7209,29 +8040,24 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
if (VT.isVector()) {
// This just handles C1 * C2 for vectors. Other vector folds are below.
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
- // Canonicalize vector constant to RHS.
- if (N0.getOpcode() == ISD::BUILD_VECTOR &&
- N1.getOpcode() != ISD::BUILD_VECTOR)
- if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0))
- if (BV0->isConstant())
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
}
// fold (fmul c1, c2) -> c1*c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
// canonicalize constant to RHS
- if (N0CFP && !N1CFP)
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
// fold (fmul A, 1.0) -> A
if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -7239,7 +8065,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (Options.UnsafeFPMath) {
// fold (fmul A, 0) -> 0
- if (N1CFP && N1CFP->getValueAPF().isZero())
+ if (N1CFP && N1CFP->isZero())
return N1;
// fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
@@ -7247,14 +8073,22 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// Fold scalars or any vector constants (not just splats).
// This fold is done in general by InstCombine, but extra fmul insts
// may have been generated during lowering.
+ SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
- if ((N1CFP && isConstOrConstSplatFP(N01)) ||
- (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
- SDLoc SL(N);
- SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
- return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
+
+ // Check 1: Make sure that the first operand of the inner multiply is NOT
+ // a constant. Otherwise, we may induce infinite looping.
+ if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
+ // Check 2: Make sure that the second operand of the inner multiply and
+ // the second operand of the outer multiply are constants.
+ if ((N1CFP && isConstOrConstSplatFP(N01)) ||
+ (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
+ }
}
}
@@ -7263,21 +8097,20 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// during an early run of DAGCombiner can prevent folding with fmuls
// inserted during lowering.
if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
- SDLoc SL(N);
- const SDValue Two = DAG.getConstantFP(2.0, VT);
- SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1);
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts);
+ const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
}
}
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
- return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
+ return DAG.getNode(ISD::FNEG, DL, VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
@@ -7285,7 +8118,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+ return DAG.getNode(ISD::FMUL, DL, VT,
GetNegatedExpression(N0, DAG, LegalOperations),
GetNegatedExpression(N1, DAG, LegalOperations));
}
@@ -7364,14 +8197,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (Options.UnsafeFPMath && N1CFP && N0 == N2)
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(1.0, VT)));
+ N1, DAG.getConstantFP(1.0, dl, VT)));
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
if (Options.UnsafeFPMath && N1CFP &&
N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(-1.0, VT)));
+ N1, DAG.getConstantFP(-1.0, dl, VT)));
return SDValue();
@@ -7387,10 +8220,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
- if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
- }
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
// fold (fdiv c1, c2) -> c1/c2
if (N0CFP && N1CFP)
@@ -7412,8 +8244,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT)))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
- DAG.getConstantFP(Recip, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getConstantFP(Recip, DL, VT));
}
// If this FDIV is part of a reciprocal square root, it may be folded
@@ -7492,24 +8324,22 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
SmallVector<SDNode *, 4> Users;
// Find all FDIV users of the same divisor.
- for (SDNode::use_iterator UI = N1.getNode()->use_begin(),
- UE = N1.getNode()->use_end();
- UI != UE; ++UI) {
- SDNode *User = UI.getUse().getUser();
- if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1)
- Users.push_back(User);
+ for (auto *U : N1->uses()) {
+ if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
+ Users.push_back(U);
}
if (TLI.combineRepeatedFPDivisors(Users.size())) {
- SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0
- SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1);
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
// Dividend / Divisor -> Dividend * Reciprocal
- for (auto I = Users.begin(), E = Users.end(); I != E; ++I) {
- if ((*I)->getOperand(0) != FPOne) {
- SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT,
- (*I)->getOperand(0), Reciprocal);
- DAG.ReplaceAllUsesWith(*I, NewNode.getNode());
+ for (auto *U : Users) {
+ SDValue Dividend = U->getOperand(0);
+ if (Dividend != FPOne) {
+ SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
+ Reciprocal);
+ DAG.ReplaceAllUsesWith(U, NewNode.getNode());
}
}
return SDValue();
@@ -7539,20 +8369,21 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
// Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
EVT VT = RV.getValueType();
- RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);
+ SDLoc DL(N);
+ RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
AddToWorklist(RV.getNode());
// Unfortunately, RV is now NaN if the input was exactly 0.
// Select out this case and force the answer to 0.
- SDValue Zero = DAG.getConstantFP(0.0, VT);
+ SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
SDValue ZeroCmp =
- DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
+ DAG.getSetCC(DL, TLI.getSetCCResultType(*DAG.getContext(), VT),
N->getOperand(0), Zero, ISD::SETEQ);
AddToWorklist(ZeroCmp.getNode());
AddToWorklist(RV.getNode());
RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
- SDLoc(N), VT, ZeroCmp, Zero, RV);
+ DL, VT, ZeroCmp, Zero, RV);
return RV;
}
}
@@ -7611,12 +8442,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
- if (N0C &&
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -7638,11 +8468,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
!VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0), N0.getOperand(1),
- DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
+ DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
N0.getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
}
// fold (sint_to_fp (zext (setcc x, y, cc))) ->
@@ -7651,11 +8482,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
- DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
+ DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
N0.getOperand(0).getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
}
}
@@ -7664,12 +8496,11 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
- if (N0C &&
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -7691,39 +8522,82 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0), N0.getOperand(1),
- DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
+ DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
N0.getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
}
}
return SDValue();
}
+// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
+static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
+ return SDValue();
+
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
+ bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ // We can safely assume the conversion won't overflow the output range,
+ // because (for example) (uint8_t)18293.f is undefined behavior.
+
+ // Since we can assume the conversion won't overflow, our decision as to
+ // whether the input will fit in the float should depend on the minimum
+ // of the input range and output range.
+
+ // This means this is also safe for a signed input and unsigned output, since
+ // a negative input would lead to undefined behavior.
+ unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
+ unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
+ unsigned ActualSize = std::min(InputSize, OutputSize);
+ const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
+
+ // We can only fold away the float conversion if the input range can be
+ // represented exactly in the float range.
+ if (APFloat::semanticsPrecision(sem) >= ActualSize) {
+ if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
+ unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
+ }
+ if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
+ if (SrcVT == VT)
+ return Src;
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (fp_to_sint c1fp) -> c1
- if (N0CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
- return SDValue();
+ return FoldIntToFPToInt(N, DAG);
}
SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (fp_to_uint c1fp) -> c1
- if (N0CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
- return SDValue();
+ return FoldIntToFPToInt(N, DAG);
}
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
@@ -7742,11 +8616,18 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
// fold (fp_round (fp_round x)) -> (fp_round x)
if (N0.getOpcode() == ISD::FP_ROUND) {
- // This is a value preserving truncation if both round's are.
- bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
- N0.getNode()->getConstantOperandVal(1) == 1;
- return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
- DAG.getIntPtrConstant(IsTrunc));
+ const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
+ const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+ // If the first fp_round isn't a value preserving truncation, it might
+ // introduce a tie in the second fp_round, that wouldn't occur in the
+ // single-step fp_round we want to fold to.
+ // In other words, double rounding isn't the same as rounding.
+ // Also, this is a value preserving truncation iff both fp_round's are.
+ if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
+ }
}
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
@@ -7769,8 +8650,9 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
// fold (fp_round_inreg c1fp) -> c1fp
if (N0CFP && isTypeLegal(EVT)) {
- SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
- return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round);
+ SDLoc DL(N);
+ SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
+ return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
}
return SDValue();
@@ -7778,7 +8660,6 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
@@ -7787,9 +8668,14 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
return SDValue();
// fold (fp_extend c1fp) -> c1fp
- if (N0CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
+ // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
+ if (N0.getOpcode() == ISD::FP16_TO_FP &&
+ TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
+
// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
// value of X.
if (N0.getOpcode() == ISD::FP_ROUND
@@ -7813,7 +8699,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
- N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+ N0.getValueType(), ExtLoad,
+ DAG.getIntPtrConstant(1, SDLoc(N0))),
ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -7823,11 +8710,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
SDValue DAGCombiner::visitFCEIL(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (fceil c1) -> fceil(c1)
- if (N0CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
return SDValue();
@@ -7835,11 +8721,10 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) {
SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (ftrunc c1) -> ftrunc(c1)
- if (N0CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
return SDValue();
@@ -7847,11 +8732,10 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (ffloor c1) -> ffloor(c1)
- if (N0CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
return SDValue();
@@ -7862,14 +8746,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVUnaryOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
- }
-
// Constant fold FNEG.
- if (isa<ConstantFPSDNode>(N0))
- return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0));
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options))
@@ -7893,15 +8772,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
// For a scalar, just generate 0x80...
SignMask = APInt::getSignBit(IntVT.getSizeInBits());
}
- Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
- DAG.getConstant(SignMask, IntVT));
+ SDLoc DL0(N0);
+ Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
+ DAG.getConstant(SignMask, DL0, IntVT));
AddToWorklist(Int.getNode());
return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
}
}
// (fneg (fmul c, x)) -> (fmul -c, x)
- if (N0.getOpcode() == ISD::FMUL) {
+ if (N0.getOpcode() == ISD::FMUL &&
+ (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
if (CFP1) {
APFloat CVal = CFP1->getValueAPF();
@@ -7927,7 +8808,7 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0));
+ return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0));
}
if (N0CFP) {
@@ -7948,7 +8829,7 @@ SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0));
+ return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0));
}
if (N0CFP) {
@@ -7964,13 +8845,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVUnaryOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
- }
-
// fold (fabs c1) -> fabs(c1)
- if (isa<ConstantFPSDNode>(N0))
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
// fold (fabs (fabs x)) -> (fabs x)
@@ -8000,8 +8876,9 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
// For a scalar, just generate 0x7f...
SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
}
- Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
- DAG.getConstant(SignMask, IntVT));
+ SDLoc DL(N0);
+ Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
+ DAG.getConstant(SignMask, DL, IntVT));
AddToWorklist(Int.getNode());
return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
}
@@ -8071,13 +8948,14 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
if (AndConst.isPowerOf2() &&
cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+ SDLoc DL(N);
SDValue SetCC =
- DAG.getSetCC(SDLoc(N),
+ DAG.getSetCC(DL,
getSetCCResultType(Op0.getValueType()),
- Op0, DAG.getConstant(0, Op0.getValueType()),
+ Op0, DAG.getConstant(0, DL, Op0.getValueType()),
ISD::SETNE);
- SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N),
+ SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
MVT::Other, Chain, SetCC, N2);
// Don't add the new BRCond into the worklist or else SimplifySelectCC
// will convert it back to (X & C1) >> C2.
@@ -8130,12 +9008,11 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
bool Equal = false;
- if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
- if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
- Op0.getOpcode() == ISD::XOR) {
- TheXor = Op0.getNode();
- Equal = true;
- }
+ if (isOneConstant(Op0) && Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::XOR) {
+ TheXor = Op0.getNode();
+ Equal = true;
+ }
EVT SetCCVT = N1.getValueType();
if (LegalTypes)
@@ -8190,14 +9067,18 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
SelectionDAG &DAG,
const TargetLowering &TLI) {
EVT VT;
+ unsigned AS;
+
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
return false;
- VT = Use->getValueType(0);
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
return false;
- VT = ST->getValue().getValueType();
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
} else
return false;
@@ -8221,7 +9102,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
} else
return false;
- return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+ return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()), AS);
}
/// Try turning a load/store into a pre-indexed load/store when the base
@@ -8280,8 +9161,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
}
// Don't create a indexed load / store with zero offset.
- if (isa<ConstantSDNode>(Offset) &&
- cast<ConstantSDNode>(Offset)->isNullValue())
+ if (isNullConstant(Offset))
return false;
// Try turning it into a pre-indexed load / store except when:
@@ -8309,24 +9189,25 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// a copy of the original base pointer.
SmallVector<SDNode *, 16> OtherUses;
if (isa<ConstantSDNode>(Offset))
- for (SDNode *Use : BasePtr.getNode()->uses()) {
- if (Use == Ptr.getNode())
+ for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
+ UE = BasePtr.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ // Skip the use that is Ptr and uses of other results from BasePtr's
+ // node (important for nodes that return multiple results).
+ if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
continue;
- if (Use->isPredecessorOf(N))
+ if (Use.getUser()->isPredecessorOf(N))
continue;
- if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
+ if (Use.getUser()->getOpcode() != ISD::ADD &&
+ Use.getUser()->getOpcode() != ISD::SUB) {
OtherUses.clear();
break;
}
- SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
- if (Op1.getNode() == BasePtr.getNode())
- std::swap(Op0, Op1);
- assert(Op0.getNode() == BasePtr.getNode() &&
- "Use of ADD/SUB but not an operand");
-
+ SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
if (!isa<ConstantSDNode>(Op1)) {
OtherUses.clear();
break;
@@ -8338,7 +9219,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
break;
}
- OtherUses.push_back(Use);
+ OtherUses.push_back(Use.getUser());
}
if (Swapped)
@@ -8431,12 +9312,14 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
else CNV = CNV - Offset1;
+ SDLoc DL(OtherUses[i]);
+
// We can now generate the new expression.
- SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
+ SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
SDValue NewUse = DAG.getNode(Opcode,
- SDLoc(OtherUses[i]),
+ DL,
OtherUses[i]->getValueType(0), NewOp1, NewOp2);
DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
deleteAndRecombine(OtherUses[i]);
@@ -8494,8 +9377,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
ISD::MemIndexedMode AM = ISD::UNINDEXED;
if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
// Don't create a indexed load / store with zero offset.
- if (isa<ConstantSDNode>(Offset) &&
- cast<ConstantSDNode>(Offset)->isNullValue())
+ if (isNullConstant(Offset))
continue;
// Try turning it into a post-indexed load / store except when
@@ -8585,7 +9467,7 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
"Cannot split out indexing using opaque target constants");
if (Inc.getOpcode() == ISD::TargetConstant) {
ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
- Inc = DAG.getConstant(*ConstInc->getConstantIntValue(),
+ Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
ConstInc->getValueType(0));
}
@@ -8686,7 +9568,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(), Align, LD->getAAInfo());
- return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+ if (NewLoad.getNode() != N)
+ return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
}
}
}
@@ -8851,9 +9734,6 @@ struct LoadedSlice {
unsigned Shift = 0, SelectionDAG *DAG = nullptr)
: Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
- LoadedSlice(const LoadedSlice &LS)
- : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {}
-
/// \brief Get the bits used in a chunk of bits \p BitWidth large.
/// \return Result is \p BitWidth and has used bits set to 1 and
/// not used bits set to 0.
@@ -8980,8 +9860,9 @@ struct LoadedSlice {
if (Offset) {
// BaseAddr = BaseAddr + Offset.
EVT ArithType = BaseAddr.getValueType();
- BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr,
- DAG->getConstant(Offset, ArithType));
+ SDLoc DL(Origin);
+ BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
+ DAG->getConstant(Offset, DL, ArithType));
}
// Create the type of the loaded slice according to its size.
@@ -9336,7 +10217,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
if (NotMaskLZ == 64) return Result; // All zero mask.
// See if we have a continuous run of bits. If so, we have 0*1+0*
- if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+ if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
return Result;
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
@@ -9387,10 +10268,12 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
- if (ByteShift)
- IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal,
- DAG.getConstant(ByteShift*8,
+ if (ByteShift) {
+ SDLoc DL(IVal);
+ IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
+ DAG.getConstant(ByteShift*8, DL,
DC->getShiftAmountTy(IVal.getValueType())));
+ }
// Figure out the offset for the store and the alignment of the access.
unsigned StOffset;
@@ -9403,8 +10286,9 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue Ptr = St->getBasePtr();
if (StOffset) {
- Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(),
- Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+ SDLoc DL(IVal);
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
+ Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
NewAlign = MinAlign(NewAlign, StOffset);
}
@@ -9486,8 +10370,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
// The narrowing should be profitable, the load/store operation should be
// legal (or custom) and the store size should be equal to the NewVT width.
while (NewBW < BitWidth &&
- !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
- TLI.isNarrowingProfitable(VT, NewVT))) {
+ (NewVT.getStoreSizeInBits() != NewBW ||
+ !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
+ !TLI.isNarrowingProfitable(VT, NewVT))) {
NewBW = NextPowerOf2(NewBW);
NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
}
@@ -9517,7 +10402,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
Ptr.getValueType(), Ptr,
- DAG.getConstant(PtrOff, Ptr.getValueType()));
+ DAG.getConstant(PtrOff, SDLoc(LD),
+ Ptr.getValueType()));
SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff),
@@ -9525,7 +10411,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
LD->isInvariant(), NewAlign,
LD->getAAInfo());
SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
- DAG.getConstant(NewImm, NewVT));
+ DAG.getConstant(NewImm, SDLoc(Value),
+ NewVT));
SDValue NewST = DAG.getStore(Chain, SDLoc(N),
NewVal, NewPtr,
ST->getPointerInfo().getWithOffset(PtrOff),
@@ -9599,6 +10486,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
return SDValue();
}
+namespace {
/// Helper struct to parse and store a memory address as base + index + offset.
/// We ignore sign extensions when it is safe to do so.
/// The following two expressions are not equivalent. To differentiate we need
@@ -9686,37 +10574,156 @@ struct BaseIndexOffset {
return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
}
};
+} // namespace
-/// Holds a pointer to an LSBaseSDNode as well as information on where it
-/// is located in a sequence of memory operations connected by a chain.
-struct MemOpLink {
- MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
- MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
- // Ptr to the mem node.
- LSBaseSDNode *MemNode;
- // Offset from the base ptr.
- int64_t OffsetFromBase;
- // What is the sequence number of this mem node.
- // Lowest mem operand in the DAG starts at zero.
- unsigned SequenceNum;
-};
+bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
+ SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
+ unsigned NumElem, bool IsConstantSrc, bool UseVector) {
+ // Make sure we have something to merge.
+ if (NumElem < 2)
+ return false;
+
+ int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned LatestNodeUsed = 0;
+
+ for (unsigned i=0; i < NumElem; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // latest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+ LatestNodeUsed = i;
+ }
+
+ // The latest Node in the DAG.
+ LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+ SDLoc DL(StoreNodes[0].MemNode);
+
+ SDValue StoredVal;
+ if (UseVector) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+ if (IsConstantSrc) {
+ // A vector store with a constant source implies that the constant is
+ // zero; we only handle merging stores of constant zeros because the zero
+ // can be materialized without a load.
+ // It may be beneficial to loosen this restriction to allow non-zero
+ // store merging.
+ StoredVal = DAG.getConstant(0, DL, Ty);
+ } else {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue Val = St->getValue();
+ // All of the operands of a BUILD_VECTOR must have the same type.
+ if (Val.getValueType() != MemVT)
+ return false;
+ Ops.push_back(Val);
+ }
+
+ // Build the extracted vector elements back into a vector.
+ StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
+ }
+ } else {
+ // We should always use a vector store when merging extracted vector
+ // elements, so this path implies a store of constants.
+ assert(IsConstantSrc && "Merged vector elements should use vector store");
+
+ unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+ APInt StoreInt(StoreBW, 0);
+
+ // Construct a single integer constant which is made of the smaller
+ // constant inputs.
+ bool IsLE = TLI.isLittleEndian();
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+ SDValue Val = St->getValue();
+ StoreInt <<= ElementSizeBytes*8;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+ StoreInt |= C->getAPIntValue().zext(StoreBW);
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+ StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+ } else {
+ llvm_unreachable("Invalid constant element type");
+ }
+ }
+
+ // Create the new Load and Store operations.
+ EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
+ }
+
+ SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ false, false,
+ FirstInChain->getAlignment());
+
+ // Replace the last store with the new store
+ CombineTo(LatestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ if (StoreNodes[i].MemNode == LatestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ // ReplaceAllUsesWith will replace all uses that existed when it was
+ // called, but graph optimizations may cause new ones to appear. For
+ // example, the case in pr14333 looks like
+ //
+ // St's chain -> St -> another store -> X
+ //
+ // And the only difference from St to the other store is the chain.
+ // When we change it's chain to be St's chain they become identical,
+ // get CSEed and the net result is that X is now a use of St.
+ // Since we know that St is redundant, just iterate.
+ while (!St->use_empty())
+ DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+ deleteAndRecombine(St);
+ }
+
+ return true;
+}
+
+static bool allowableAlignment(const SelectionDAG &DAG,
+ const TargetLowering &TLI, EVT EVTTy,
+ unsigned AS, unsigned Align) {
+ if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align))
+ return true;
+
+ Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
+ return (Align >= ABIAlignment);
+}
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
- bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+ bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::NoImplicitFloat);
+
+ // This function cannot currently deal with non-byte-sized memory sizes.
+ if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
+ return false;
// Don't merge vectors into wider inputs.
if (MemVT.isVector() || !MemVT.isSimple())
return false;
// Perform an early exit check. Do not bother looking at stored values that
- // are not constants or loads.
+ // are not constants, loads, or extracted vector elements.
SDValue StoredVal = St->getValue();
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
- if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
- !IsLoadSrc)
+ bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
+ isa<ConstantFPSDNode>(StoredVal);
+ bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+
+ if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
return false;
// Only look at ends of store sequences.
@@ -9761,10 +10768,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (!Ptr.equalBaseIndex(BasePtr))
break;
- // Check that the alignment is the same.
- if (Index->getAlignment() != St->getAlignment())
- break;
-
// The memory operands must not be volatile.
if (Index->isVolatile() || Index->isIndexed())
break;
@@ -9778,11 +10781,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (Index->getMemoryVT() != MemVT)
break;
- // We do not allow unaligned stores because we want to prevent overriding
- // stores.
- if (Index->getAlignment()*8 != MemVT.getSizeInBits())
- break;
-
// We found a potential memory operand to merge.
StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
@@ -9856,9 +10854,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// The node with the lowest store address.
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
// Store the constants into memory as one consecutive store.
- if (!IsLoadSrc) {
+ if (IsConstantSrc) {
unsigned LastLegalType = 0;
unsigned LastLegalVectorType = 0;
bool NonZero = false;
@@ -9878,27 +10878,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Find a legal type for the constant store.
unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
- if (TLI.isTypeLegal(StoreTy))
+ if (TLI.isTypeLegal(StoreTy) &&
+ allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
+ FirstStoreAlign)) {
LastLegalType = i+1;
// Or check whether a truncstore is legal.
- else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
- TargetLowering::TypePromoteInteger) {
+ } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
- if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
- LastLegalType = i+1;
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
+ FirstStoreAlign)) {
+ LastLegalType = i + 1;
+ }
}
// Find a legal type for the vector store.
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
- if (TLI.isTypeLegal(Ty))
+ if (TLI.isTypeLegal(Ty) &&
+ allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) {
LastLegalVectorType = i + 1;
+ }
}
- // We only use vectors if the constant is known to be zero and the
- // function is not marked with the noimplicitfloat attribute.
- if (NonZero || NoVectors)
+
+ // We only use vectors if the constant is known to be zero or the target
+ // allows it and the function is not marked with the noimplicitfloat
+ // attribute.
+ if (NoVectors) {
LastLegalVectorType = 0;
+ } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT,
+ LastLegalVectorType,
+ FirstStoreAS)) {
+ LastLegalVectorType = 0;
+ }
// Check if we found a legal integer type to store.
if (LastLegalType == 0 && LastLegalVectorType == 0)
@@ -9907,85 +10921,34 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
- // Make sure we have something to merge.
- if (NumElem < 2)
- return false;
-
- unsigned EarliestNodeUsed = 0;
- for (unsigned i=0; i < NumElem; ++i) {
- // Find a chain for the new wide-store operand. Notice that some
- // of the store nodes that we found may not be selected for inclusion
- // in the wide store. The chain we use needs to be the chain of the
- // earliest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
- EarliestNodeUsed = i;
- }
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ true, UseVector);
+ }
- // The earliest Node in the DAG.
- LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
- SDLoc DL(StoreNodes[0].MemNode);
+ // When extracting multiple vector elements, try to store them
+ // in one vector store rather than a sequence of scalar stores.
+ if (IsExtractVecEltSrc) {
+ unsigned NumElem = 0;
+ for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = St->getValue();
+ // This restriction could be loosened.
+ // Bail out if any stored values are not elements extracted from a vector.
+ // It should be possible to handle mixed sources, but load sources need
+ // more careful handling (see the block of code below that handles
+ // consecutive loads).
+ if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
- SDValue StoredVal;
- if (UseVector) {
// Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
- assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
- StoredVal = DAG.getConstant(0, Ty);
- } else {
- unsigned StoreBW = NumElem * ElementSizeBytes * 8;
- APInt StoreInt(StoreBW, 0);
-
- // Construct a single integer constant which is made of the smaller
- // constant inputs.
- bool IsLE = TLI.isLittleEndian();
- for (unsigned i = 0; i < NumElem ; ++i) {
- unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
- SDValue Val = St->getValue();
- StoreInt<<=ElementSizeBytes*8;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
- StoreInt|=C->getAPIntValue().zext(StoreBW);
- } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
- StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
- } else {
- llvm_unreachable("Invalid constant element type");
- }
- }
-
- // Create the new Load and Store operations.
- EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
- StoredVal = DAG.getConstant(StoreInt, StoreTy);
- }
-
- SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- false, false,
- FirstInChain->getAlignment());
-
- // Replace the first store with the new store
- CombineTo(EarliestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumElem ; ++i) {
- if (StoreNodes[i].MemNode == EarliestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- // ReplaceAllUsesWith will replace all uses that existed when it was
- // called, but graph optimizations may cause new ones to appear. For
- // example, the case in pr14333 looks like
- //
- // St's chain -> St -> another store -> X
- //
- // And the only difference from St to the other store is the chain.
- // When we change it's chain to be St's chain they become identical,
- // get CSEed and the net result is that X is now a use of St.
- // Since we know that St is redundant, just iterate.
- while (!St->use_empty())
- DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ if (TLI.isTypeLegal(Ty) &&
+ allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign))
+ NumElem = i + 1;
}
- return true;
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ false, true);
}
// Below we handle the case of multiple consecutive stores that
@@ -10007,10 +10970,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (!Ld->hasNUsesOfValue(1, 0))
break;
- // Check that the alignment is the same as the stores.
- if (Ld->getAlignment() != St->getAlignment())
- break;
-
// The memory operands must not be volatile.
if (Ld->isVolatile() || Ld->isIndexed())
break;
@@ -10048,6 +11007,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
St->getAlignment() >= RequiredAlignment)
return false;
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ unsigned FirstLoadAS = FirstLoad->getAddressSpace();
+ unsigned FirstLoadAlign = FirstLoad->getAlignment();
+
// Scan the memory operations on the chain and find the first non-consecutive
// load memory address. These variables hold the index in the store node
// array.
@@ -10056,7 +11019,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
unsigned LastLegalVectorType = 0;
unsigned LastLegalIntegerType = 0;
StartAddress = LoadNodes[0].OffsetFromBase;
- SDValue FirstChain = LoadNodes[0].MemNode->getChain();
+ SDValue FirstChain = FirstLoad->getChain();
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
// All loads much share the same chain.
if (LoadNodes[i].MemNode->getChain() != FirstChain)
@@ -10069,13 +11032,18 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Find a legal type for the vector store.
EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
- if (TLI.isTypeLegal(StoreTy))
+ if (TLI.isTypeLegal(StoreTy) &&
+ allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
+ allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) {
LastLegalVectorType = i + 1;
+ }
// Find a legal type for the integer store.
unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
- if (TLI.isTypeLegal(StoreTy))
+ if (TLI.isTypeLegal(StoreTy) &&
+ allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
+ allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
LastLegalIntegerType = i + 1;
// Or check whether a truncstore and extload is legal.
else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
@@ -10085,7 +11053,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy))
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+ allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
+ FirstStoreAlign) &&
+ allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS,
+ FirstLoadAlign))
LastLegalIntegerType = i+1;
}
}
@@ -10103,18 +11075,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (NumElem < 2)
return false;
- // The earliest Node in the DAG.
- unsigned EarliestNodeUsed = 0;
- LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ // The latest Node in the DAG.
+ unsigned LatestNodeUsed = 0;
for (unsigned i=1; i<NumElem; ++i) {
// Find a chain for the new wide-store operand. Notice that some
// of the store nodes that we found may not be selected for inclusion
// in the wide store. The chain we use needs to be the chain of the
- // earliest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
- EarliestNodeUsed = i;
+ // latest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+ LatestNodeUsed = i;
}
+ LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+
// Find if it is better to use vectors or integers to load and store
// to memory.
EVT JointMemOpVT;
@@ -10128,18 +11101,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
SDLoc LoadDL(LoadNodes[0].MemNode);
SDLoc StoreDL(StoreNodes[0].MemNode);
- LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
- SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
- FirstLoad->getChain(),
- FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(),
- false, false, false,
- FirstLoad->getAlignment());
-
- SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), false, false,
- FirstInChain->getAlignment());
+ SDValue NewLoad = DAG.getLoad(
+ JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
+
+ SDValue NewStore = DAG.getStore(
+ LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
// Replace one of the loads with the new load.
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
@@ -10154,12 +11122,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
}
- // Replace the first store with the new store.
- CombineTo(EarliestOp, NewStore);
+ // Replace the last store with the new store.
+ CombineTo(LatestOp, NewStore);
// Erase all other stores.
for (unsigned i = 0; i < NumElem ; ++i) {
// Remove all Store nodes.
- if (StoreNodes[i].MemNode == EarliestOp)
+ if (StoreNodes[i].MemNode == LatestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
@@ -10214,8 +11182,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
case MVT::f32:
if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ ;
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
- bitcastToAPInt().getZExtValue(), MVT::i32);
+ bitcastToAPInt().getZExtValue(), SDLoc(CFP),
+ MVT::i32);
return DAG.getStore(Chain, SDLoc(N), Tmp,
Ptr, ST->getMemOperand());
}
@@ -10224,8 +11194,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
!ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ ;
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
- getZExtValue(), MVT::i64);
+ getZExtValue(), SDLoc(CFP), MVT::i64);
return DAG.getStore(Chain, SDLoc(N), Tmp,
Ptr, ST->getMemOperand());
}
@@ -10236,8 +11207,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// argument passing. Since this is so common, custom legalize the
// 64-bit integer store into two 32-bit stores.
uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
- SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
unsigned Alignment = ST->getAlignment();
@@ -10245,18 +11216,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
bool isNonTemporal = ST->isNonTemporal();
AAMDNodes AAInfo = ST->getAAInfo();
+ SDLoc DL(N);
+
SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal,
ST->getAlignment(), AAInfo);
- Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
- DAG.getConstant(4, Ptr.getValueType()));
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, DL, Ptr.getValueType()));
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
Ptr, ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal,
Alignment, AAInfo);
- return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
St0, St1);
}
@@ -10268,11 +11241,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > ST->getAlignment())
- return DAG.getTruncStore(Chain, SDLoc(N), Value,
+ if (Align > ST->getAlignment()) {
+ SDValue NewStore =
+ DAG.getTruncStore(Chain, SDLoc(N), Value,
Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(), Align,
ST->getAAInfo());
+ if (NewStore.getNode() != N)
+ return CombineTo(ST, NewStore, true);
+ }
}
}
@@ -10493,24 +11470,20 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
SDValue Offset;
EVT PtrType = NewPtr.getValueType();
MachinePointerInfo MPI;
+ SDLoc DL(EVE);
if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
int Elt = ConstEltNo->getZExtValue();
unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
- if (TLI.isBigEndian())
- PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
- Offset = DAG.getConstant(PtrOff, PtrType);
+ Offset = DAG.getConstant(PtrOff, DL, PtrType);
MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
} else {
+ Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
Offset = DAG.getNode(
- ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
- DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
- if (TLI.isBigEndian())
- Offset = DAG.getNode(
- ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
- DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
+ ISD::MUL, DL, PtrType, Offset,
+ DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
MPI = OriginalLoad->getPointerInfo();
}
- NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
+ NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
// The replacement we need to do here is a little tricky: we need to
// replace an extractelement of a load with a load.
@@ -10620,8 +11593,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (!LegalOperations) {
EVT IndexTy = TLI.getVectorIdxTy();
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
- SVInVec, DAG.getConstant(OrigElt, IndexTy));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
+ DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
}
}
@@ -10710,7 +11683,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
- EltNo = DAG.getConstant(Elt, EltNo.getValueType());
+ EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
}
}
@@ -10800,7 +11773,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
assert(ElemRatio > 1 && "Invalid element size ratio");
SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
- DAG.getConstant(0, SourceType);
+ DAG.getConstant(0, SDLoc(N), SourceType);
unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
@@ -10890,6 +11863,11 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
return SDValue();
+ // Just because the floating-point vector type is legal does not necessarily
+ // mean that the corresponding integer vector type is.
+ if (!isTypeLegal(NVT))
+ return SDValue();
+
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
@@ -10914,12 +11892,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
- SDValue V = reduceBuildVecExtToExtBuildVec(N);
- if (V.getNode())
+ if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
- V = reduceBuildVecConvertToConvertBuildVec(N);
- if (V.getNode())
+ if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
return V;
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
@@ -10942,10 +11918,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (Op.getOpcode() == ISD::UNDEF) continue;
// See if we can combine this build_vector into a blend with a zero vector.
- if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Op.getNode())->isNullValue()) ||
- (Op.getOpcode() == ISD::ConstantFP &&
- cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) {
+ if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
UsesZeroVector = true;
continue;
}
@@ -11047,20 +12020,20 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
return SDValue();
-
+
// Try to replace VecIn1 with two extract_subvectors
// No need to update the masks, they should still be correct.
- VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
- DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
+ VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+ DAG.getConstant(VT.getVectorNumElements(), dl, TLI.getVectorIdxTy()));
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
} else
return SDValue();
}
if (UsesZeroVector)
- VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) :
- DAG.getConstantFP(0.0, VT);
+ VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
+ DAG.getConstantFP(0.0, dl, VT);
else
// If VecIn2 is unused then change it to undef.
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
@@ -11081,6 +12054,68 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
return SDValue();
}
+static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT OpVT = N->getOperand(0).getValueType();
+
+ // If the operands are legal vectors, leave them alone.
+ if (TLI.isTypeLegal(OpVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SmallVector<SDValue, 8> Ops;
+
+ EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+ SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+
+ // Keep track of what we encounter.
+ bool AnyInteger = false;
+ bool AnyFP = false;
+ for (const SDValue &Op : N->ops()) {
+ if (ISD::BITCAST == Op.getOpcode() &&
+ !Op.getOperand(0).getValueType().isVector())
+ Ops.push_back(Op.getOperand(0));
+ else if (ISD::UNDEF == Op.getOpcode())
+ Ops.push_back(ScalarUndef);
+ else
+ return SDValue();
+
+ // Note whether we encounter an integer or floating point scalar.
+ // If it's neither, bail out, it could be something weird like x86mmx.
+ EVT LastOpVT = Ops.back().getValueType();
+ if (LastOpVT.isFloatingPoint())
+ AnyFP = true;
+ else if (LastOpVT.isInteger())
+ AnyInteger = true;
+ else
+ return SDValue();
+ }
+
+ // If any of the operands is a floating point scalar bitcast to a vector,
+ // use floating point types throughout, and bitcast everything.
+ // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
+ if (AnyFP) {
+ SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
+ ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+ if (AnyInteger) {
+ for (SDValue &Op : Ops) {
+ if (Op.getValueType() == SVT)
+ continue;
+ if (Op.getOpcode() == ISD::UNDEF)
+ Op = ScalarUndef;
+ else
+ Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
+ }
+ }
+ }
+
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+ VT.getSizeInBits() / SVT.getSizeInBits());
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
+}
+
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
// EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
@@ -11096,9 +12131,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
- // Optimize concat_vectors where one of the vectors is undef.
- if (N->getNumOperands() == 2 &&
- N->getOperand(1)->getOpcode() == ISD::UNDEF) {
+ // Optimize concat_vectors where all but the first of the vectors are undef.
+ if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
+ return Op.getOpcode() == ISD::UNDEF;
+ })) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
@@ -11106,6 +12142,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (In->getOpcode() == ISD::BITCAST &&
!In->getOperand(0)->getValueType(0).isVector()) {
SDValue Scalar = In->getOperand(0);
+
+ // If the bitcast type isn't legal, it might be a trunc of a legal type;
+ // look through the trunc so we can still do the transform:
+ // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
+ if (Scalar->getOpcode() == ISD::TRUNCATE &&
+ !TLI.isTypeLegal(Scalar.getValueType()) &&
+ TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
+ Scalar = Scalar->getOperand(0);
+
EVT SclTy = Scalar->getValueType(0);
if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
@@ -11122,39 +12167,61 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
}
+ // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
+ // We have already tested above for an UNDEF only concatenation.
// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
// -> (BUILD_VECTOR A, B, ..., C, D, ...)
- if (N->getNumOperands() == 2 &&
- N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
- N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
- EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
+ auto IsBuildVectorOrUndef = [](const SDValue &Op) {
+ return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
+ };
+ bool AllBuildVectorsOrUndefs =
+ std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
+ if (AllBuildVectorsOrUndefs) {
SmallVector<SDValue, 8> Opnds;
- unsigned BuildVecNumElts = N0.getNumOperands();
-
- EVT SclTy0 = N0.getOperand(0)->getValueType(0);
- EVT SclTy1 = N1.getOperand(0)->getValueType(0);
- if (SclTy0.isFloatingPoint()) {
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N0.getOperand(i));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N1.getOperand(i));
- } else {
+ EVT SVT = VT.getScalarType();
+
+ EVT MinVT = SVT;
+ if (!SVT.isFloatingPoint()) {
// If BUILD_VECTOR are from built from integer, they may have different
- // operand types. Get the smaller type and truncate all operands to it.
- EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
- N0.getOperand(i)));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
- N1.getOperand(i)));
+ // operand types. Get the smallest type and truncate all operands to it.
+ bool FoundMinVT = false;
+ for (const SDValue &Op : N->ops())
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ EVT OpSVT = Op.getOperand(0)->getValueType(0);
+ MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
+ FoundMinVT = true;
+ }
+ assert(FoundMinVT && "Concat vector type mismatch");
}
+ for (const SDValue &Op : N->ops()) {
+ EVT OpVT = Op.getValueType();
+ unsigned NumElts = OpVT.getVectorNumElements();
+
+ if (ISD::UNDEF == Op.getOpcode())
+ Opnds.append(NumElts, DAG.getUNDEF(MinVT));
+
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ if (SVT.isFloatingPoint()) {
+ assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
+ Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
+ } else {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
+ }
+ }
+ }
+
+ assert(VT.getVectorNumElements() == Opnds.size() &&
+ "Concat vector type mismatch");
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
+ // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+ if (SDValue V = combineConcatVectorOfScalars(N, DAG))
+ return V;
+
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes.
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -11216,7 +12283,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// type.
if (V->getOperand(0).getValueType() != NVT)
return SDValue();
- unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned Idx = N->getConstantOperandVal(1);
unsigned NumElems = NVT.getVectorNumElements();
assert((Idx % NumElems) == 0 &&
"IDX in concat is not a multiple of the result vector length.");
@@ -11347,7 +12414,8 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
}
-// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
+// or turn a shuffle of a single concat into simpler shuffle then concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -11361,6 +12429,18 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
unsigned NumConcats = NumElts / NumElemsPerConcat;
+ // Special case: shuffle(concat(A,B)) can be more efficiently represented
+ // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
+ // half vector elements.
+ if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF &&
+ std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
+ SVN->getMask().end(), [](int i) { return i == -1; })) {
+ N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
+ ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat));
+ N1 = DAG.getUNDEF(ConcatVT);
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
+ }
+
// Look at every vector that's inserted. We're looking for exact
// subvector-sized copies from a concatenated vector
for (unsigned I = 0; I != NumConcats; ++I) {
@@ -11459,7 +12539,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
// If it is a splat, check if the argument vector is another splat or a
- // build_vector with all scalar elements the same.
+ // build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
SDNode *V = N0.getNode();
@@ -11496,6 +12576,18 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Splat of <x, x, x, x>, return <x, x, x, x>
if (AllSame)
return N0;
+
+ // Canonicalize any other splat as a build_vector.
+ const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
+ SmallVector<SDValue, 8> Ops(NumElts, Splatted);
+ SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+ V->getValueType(0), Ops);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (V->getValueType(0) != VT)
+ NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
+ return NewBV;
}
}
@@ -11516,6 +12608,118 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return V;
}
+ // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+ // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+ SmallVector<SDValue, 8> Ops;
+ for (int M : SVN->getMask()) {
+ SDValue Op = DAG.getUNDEF(VT.getScalarType());
+ if (M >= 0) {
+ int Idx = M % NumElts;
+ SDValue &S = (M < (int)NumElts ? N0 : N1);
+ if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
+ Op = S.getOperand(Idx);
+ } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
+ if (Idx == 0)
+ Op = S.getOperand(0);
+ } else {
+ // Operand can't be combined - bail out.
+ break;
+ }
+ }
+ Ops.push_back(Op);
+ }
+ if (Ops.size() == VT.getVectorNumElements()) {
+ // BUILD_VECTOR requires all inputs to be of the same type, find the
+ // maximum type and extend them all.
+ EVT SVT = VT.getScalarType();
+ if (SVT.isInteger())
+ for (SDValue &Op : Ops)
+ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+ if (SVT != VT.getScalarType())
+ for (SDValue &Op : Ops)
+ Op = TLI.isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
+ : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
+ }
+ }
+
+ // If this shuffle only has a single input that is a bitcasted shuffle,
+ // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
+ // back to their original types.
+ if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
+ TLI.isTypeLegal(VT)) {
+
+ // Peek through the bitcast only if there is one user.
+ SDValue BC0 = N0;
+ while (BC0.getOpcode() == ISD::BITCAST) {
+ if (!BC0.hasOneUse())
+ break;
+ BC0 = BC0.getOperand(0);
+ }
+
+ auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
+ if (Scale == 1)
+ return SmallVector<int, 8>(Mask.begin(), Mask.end());
+
+ SmallVector<int, 8> NewMask;
+ for (int M : Mask)
+ for (int s = 0; s != Scale; ++s)
+ NewMask.push_back(M < 0 ? -1 : Scale * M + s);
+ return NewMask;
+ };
+
+ if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
+ EVT SVT = VT.getScalarType();
+ EVT InnerVT = BC0->getValueType(0);
+ EVT InnerSVT = InnerVT.getScalarType();
+
+ // Determine which shuffle works with the smaller scalar type.
+ EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
+ EVT ScaleSVT = ScaleVT.getScalarType();
+
+ if (TLI.isTypeLegal(ScaleVT) &&
+ 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
+ 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
+
+ int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+ int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+
+ // Scale the shuffle masks to the smaller scalar type.
+ ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
+ SmallVector<int, 8> InnerMask =
+ ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
+ SmallVector<int, 8> OuterMask =
+ ScaleShuffleMask(SVN->getMask(), OuterScale);
+
+ // Merge the shuffle masks.
+ SmallVector<int, 8> NewMask;
+ for (int M : OuterMask)
+ NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
+
+ // Test for shuffle mask legality over both commutations.
+ SDValue SV0 = BC0->getOperand(0);
+ SDValue SV1 = BC0->getOperand(1);
+ bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+ if (!LegalMask) {
+ std::swap(SV0, SV1);
+ ShuffleVectorSDNode::commuteMask(NewMask);
+ LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+ }
+
+ if (LegalMask) {
+ SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
+ SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
+ return DAG.getNode(
+ ISD::BITCAST, SDLoc(N), VT,
+ DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
+ }
+ }
+ }
+ }
+
// Canonicalize shuffles according to rules:
// shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
// shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
@@ -11543,8 +12747,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
// Don't try to fold shuffles with illegal type.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
- TLI.isTypeLegal(VT)) {
+ // Only fold if this shuffle is the only user of the other shuffle.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
+ Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
// The incoming shuffle must be of the same type as the result of the
@@ -11624,20 +12829,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Avoid introducing shuffles with illegal mask.
if (!TLI.isShuffleMaskLegal(Mask, VT)) {
- // Compute the commuted shuffle mask and test again.
- for (unsigned i = 0; i != NumElts; ++i) {
- int idx = Mask[i];
- if (idx < 0)
- continue;
- else if (idx < (int)NumElts)
- Mask[i] = idx + NumElts;
- else
- Mask[i] = idx - NumElts;
- }
+ ShuffleVectorSDNode::commuteMask(Mask);
if (!TLI.isShuffleMaskLegal(Mask, VT))
return SDValue();
-
+
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
@@ -11653,6 +12849,34 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
+ SDValue InVal = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
+ // with a VECTOR_SHUFFLE.
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ SDValue InVec = InVal->getOperand(0);
+ SDValue EltNo = InVal->getOperand(1);
+
+ // FIXME: We could support implicit truncation if the shuffle can be
+ // scaled to a smaller vector scalar type.
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
+ if (C0 && VT == InVec.getValueType() &&
+ VT.getScalarType() == InVal.getValueType()) {
+ SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
+ int Elt = C0->getZExtValue();
+ NewMask[0] = Elt;
+
+ if (TLI.isShuffleMaskLegal(NewMask, VT))
+ return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
+ NewMask);
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N2 = N->getOperand(2);
@@ -11680,48 +12904,64 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (fp_to_fp16 (fp16_to_fp op)) -> op
+ if (N0->getOpcode() == ISD::FP16_TO_FP)
+ return N0->getOperand(0);
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
/// vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
EVT VT = N->getValueType(0);
- SDLoc dl(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- if (N->getOpcode() == ISD::AND) {
- if (RHS.getOpcode() == ISD::BITCAST)
- RHS = RHS.getOperand(0);
- if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
- SmallVector<int, 8> Indices;
- unsigned NumElts = RHS.getNumOperands();
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue Elt = RHS.getOperand(i);
- if (!isa<ConstantSDNode>(Elt))
- return SDValue();
+ SDLoc dl(N);
- if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
- Indices.push_back(i);
- else if (cast<ConstantSDNode>(Elt)->isNullValue())
- Indices.push_back(NumElts+i);
- else
- return SDValue();
- }
+ // Make sure we're not running after operation legalization where it
+ // may have custom lowered the vector shuffles.
+ if (LegalOperations)
+ return SDValue();
- // Let's see if the target supports this vector_shuffle.
- EVT RVT = RHS.getValueType();
- if (!TLI.isVectorClearMaskLegal(Indices, RVT))
- return SDValue();
+ if (N->getOpcode() != ISD::AND)
+ return SDValue();
- // Return the new VECTOR_SHUFFLE node.
- EVT EltVT = RVT.getVectorElementType();
- SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
- DAG.getConstant(0, EltVT));
- SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
- LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
- SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
- return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
+ if (RHS.getOpcode() == ISD::BITCAST)
+ RHS = RHS.getOperand(0);
+
+ if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<int, 8> Indices;
+ unsigned NumElts = RHS.getNumOperands();
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Elt = RHS.getOperand(i);
+ if (isAllOnesConstant(Elt))
+ Indices.push_back(i);
+ else if (isNullConstant(Elt))
+ Indices.push_back(NumElts+i);
+ else
+ return SDValue();
}
+
+ // Let's see if the target supports this vector_shuffle.
+ EVT RVT = RHS.getValueType();
+ if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ EVT EltVT = RVT.getVectorElementType();
+ SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+ DAG.getConstant(0, dl, EltVT));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps);
+ LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
+ SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
}
return SDValue();
@@ -11734,8 +12974,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- SDValue Shuffle = XformToShuffleWithZero(N);
- if (Shuffle.getNode()) return Shuffle;
+
+ if (SDValue Shuffle = XformToShuffleWithZero(N))
+ return Shuffle;
// If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
// this operation.
@@ -11754,10 +12995,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
// Can't fold divide by zero.
if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
N->getOpcode() == ISD::FDIV) {
- if ((RHSOp.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
- (RHSOp.getOpcode() == ISD::ConstantFP &&
- cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+ if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
+ cast<ConstantFPSDNode>(RHSOp.getNode())->isZero()))
break;
}
@@ -11813,38 +13052,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
return SDValue();
}
-/// Visit a binary vector operation, like FABS/FNEG.
-SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
- assert(N->getValueType(0).isVector() &&
- "SimplifyVUnaryOp only works on vectors!");
-
- SDValue N0 = N->getOperand(0);
-
- if (N0.getOpcode() != ISD::BUILD_VECTOR)
- return SDValue();
-
- // Operand is a BUILD_VECTOR node, see if we can constant fold it.
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
- SDValue Op = N0.getOperand(i);
- if (Op.getOpcode() != ISD::UNDEF &&
- Op.getOpcode() != ISD::ConstantFP)
- break;
- EVT EltVT = Op.getValueType();
- SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op);
- if (FoldOp.getOpcode() != ISD::UNDEF &&
- FoldOp.getOpcode() != ISD::ConstantFP)
- break;
- Ops.push_back(FoldOp);
- AddToWorklist(FoldOp.getNode());
- }
-
- if (Ops.size() != N0.getNumOperands())
- return SDValue();
-
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops);
-}
-
SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
SDValue N1, SDValue N2){
assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
@@ -11881,6 +13088,38 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
+ // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
+ // The select + setcc is redundant, because fsqrt returns NaN for X < -0.
+ if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
+ if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
+ // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
+ SDValue Sqrt = RHS;
+ ISD::CondCode CC;
+ SDValue CmpLHS;
+ const ConstantFPSDNode *NegZero = nullptr;
+
+ if (TheSelect->getOpcode() == ISD::SELECT_CC) {
+ CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
+ CmpLHS = TheSelect->getOperand(0);
+ NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1));
+ } else {
+ // SELECT or VSELECT
+ SDValue Cmp = TheSelect->getOperand(0);
+ if (Cmp.getOpcode() == ISD::SETCC) {
+ CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
+ CmpLHS = Cmp.getOperand(0);
+ NegZero = isConstOrConstSplatFP(Cmp.getOperand(1));
+ }
+ }
+ if (NegZero && NegZero->isNegative() && NegZero->isZero() &&
+ Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
+ CC == ISD::SETULT || CC == ISD::SETLT)) {
+ // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
+ CombineTo(TheSelect, Sqrt);
+ return true;
+ }
+ }
+ }
// Cannot simplify select with vector condition
if (TheSelect->getOperand(0).getValueType().isVector()) return false;
@@ -11902,6 +13141,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
if (LHS.getOperand(0) != RHS.getOperand(0) ||
// Do not let this transformation reduce the number of volatile loads.
LLD->isVolatile() || RLD->isVolatile() ||
+ // FIXME: If either is a pre/post inc/dec load,
+ // we'd need to split out the address adjustment.
+ LLD->isIndexed() || RLD->isIndexed() ||
// If this is an EXTLOAD, the VT's must match.
LLD->getMemoryVT() != RLD->getMemoryVT() ||
// If this is an EXTLOAD, the kind of extension must match.
@@ -12003,25 +13245,22 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
EVT VT = N2.getValueType();
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
- ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
// Determine if the condition we're dealing with is constant
SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
N0, N1, CC, DL, false);
if (SCC.getNode()) AddToWorklist(SCC.getNode());
- ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
- // fold select_cc true, x, y -> x
- if (SCCC && !SCCC->isNullValue())
- return N2;
- // fold select_cc false, x, y -> y
- if (SCCC && SCCC->isNullValue())
- return N3;
+ if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+ // fold select_cc true, x, y -> x
+ // fold select_cc false, x, y -> y
+ return !SCCC->isNullValue() ? N2 : N3;
+ }
// Check to see if we can simplify the select into an fabs node
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
// Allow either -0.0 or 0.0
- if (CFP->getValueAPF().isZero()) {
+ if (CFP->isZero()) {
// select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
N0 == N2 && N3.getOpcode() == ISD::FNEG &&
@@ -12069,9 +13308,9 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// Get the offsets to the 0 and 1 element of the array so that we can
// select between them.
- SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Zero = DAG.getIntPtrConstant(0, DL);
unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
- SDValue One = DAG.getIntPtrConstant(EltSize);
+ SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
SDValue Cond = DAG.getSetCC(DL,
getSetCCResultType(N0.getValueType()),
@@ -12086,24 +13325,23 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(), false,
false, false, Alignment);
-
}
}
// Check to see if we can perform the "gzip trick", transforming
// (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
- if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
- (N1C->isNullValue() || // (a < 0) ? b : 0
- (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
+ if (isNullConstant(N3) && CC == ISD::SETLT &&
+ (isNullConstant(N1) || // (a < 0) ? b : 0
+ (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0
EVT XType = N0.getValueType();
EVT AType = N2.getValueType();
if (XType.bitsGE(AType)) {
// and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
// single-bit constant.
- if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
unsigned ShCtV = N2C->getAPIntValue().logBase2();
- ShCtV = XType.getSizeInBits()-ShCtV-1;
- SDValue ShCt = DAG.getConstant(ShCtV,
+ ShCtV = XType.getSizeInBits() - ShCtV - 1;
+ SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
getShiftAmountTy(N0.getValueType()));
SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
XType, N0, ShCt);
@@ -12119,7 +13357,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
XType, N0,
- DAG.getConstant(XType.getSizeInBits()-1,
+ DAG.getConstant(XType.getSizeInBits() - 1,
+ SDLoc(N0),
getShiftAmountTy(N0.getValueType())));
AddToWorklist(Shift.getNode());
@@ -12139,23 +13378,21 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// single bit-test can be materialized as an all-ones register with
// shift-left and shift-right-arith.
if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
- N0->getValueType(0) == VT &&
- N1C && N1C->isNullValue() &&
- N2C && N2C->isNullValue()) {
+ N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
SDValue AndLHS = N0->getOperand(0);
ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
// Shift the tested bit over the sign bit.
APInt AndMask = ConstAndRHS->getAPIntValue();
SDValue ShlAmt =
- DAG.getConstant(AndMask.countLeadingZeros(),
+ DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
getShiftAmountTy(AndLHS.getValueType()));
SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
// Now arithmetic right shift it all the way over, so the result is either
// all-ones, or zero.
SDValue ShrAmt =
- DAG.getConstant(AndMask.getBitWidth()-1,
+ DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
getShiftAmountTy(Shl.getValueType()));
SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
@@ -12164,13 +13401,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
}
// fold select C, 16, 0 -> shl C, 4
- if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+ if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
TLI.getBooleanContents(N0.getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent) {
// If the caller doesn't want us to simplify this into a zext of a compare,
// don't do it.
- if (NotExtCompare && N2C->getAPIntValue() == 1)
+ if (NotExtCompare && N2C->isOne())
return SDValue();
// Get a SetCC of the condition
@@ -12198,13 +13435,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
AddToWorklist(SCC.getNode());
AddToWorklist(Temp.getNode());
- if (N2C->getAPIntValue() == 1)
+ if (N2C->isOne())
return Temp;
// shl setcc result by log2 n2c
return DAG.getNode(
ISD::SHL, DL, N2.getValueType(), Temp,
- DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
getShiftAmountTy(Temp.getValueType())));
}
}
@@ -12212,7 +13449,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// Check to see if this is the equivalent of setcc
// FIXME: Turn all of these into setcc if setcc if setcc is legal
// otherwise, go ahead with the folds.
- if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+ if (0 && isNullConstant(N3) && isOneConstant(N2)) {
EVT XType = N0.getValueType();
if (!LegalOperations ||
TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) {
@@ -12223,30 +13460,34 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
}
// fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
- if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+ if (isNullConstant(N1) && CC == ISD::SETEQ &&
(!LegalOperations ||
TLI.isOperationLegal(ISD::CTLZ, XType))) {
SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
DAG.getConstant(Log2_32(XType.getSizeInBits()),
+ SDLoc(Ctlz),
getShiftAmountTy(Ctlz.getValueType())));
}
// fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
- if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
- SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0),
- XType, DAG.getConstant(0, XType), N0);
- SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType);
+ if (isNullConstant(N1) && CC == ISD::SETGT) {
+ SDLoc DL(N0);
+ SDValue NegN0 = DAG.getNode(ISD::SUB, DL,
+ XType, DAG.getConstant(0, DL, XType), N0);
+ SDValue NotN0 = DAG.getNOT(DL, N0, XType);
return DAG.getNode(ISD::SRL, DL, XType,
DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
- DAG.getConstant(XType.getSizeInBits()-1,
+ DAG.getConstant(XType.getSizeInBits() - 1, DL,
getShiftAmountTy(XType)));
}
// fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
- if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
- SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0,
- DAG.getConstant(XType.getSizeInBits()-1,
+ if (isAllOnesConstant(N1) && CC == ISD::SETGT) {
+ SDLoc DL(N0);
+ SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0,
+ DAG.getConstant(XType.getSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
- return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+ return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL,
+ XType));
}
}
@@ -12269,11 +13510,12 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
EVT XType = N0.getValueType();
if (SubC && SubC->isNullValue() && XType.isInteger()) {
- SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType,
+ SDLoc DL(N0);
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
N0,
- DAG.getConstant(XType.getSizeInBits()-1,
+ DAG.getConstant(XType.getSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
- SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0),
+ SDValue Add = DAG.getNode(ISD::ADD, DL,
XType, N0, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
@@ -12303,7 +13545,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
return SDValue();
// Avoid division by zero.
- if (!C->getAPIntValue())
+ if (C->isNullValue())
return SDValue();
std::vector<SDNode*> Built;
@@ -12323,7 +13565,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
return SDValue();
// Avoid division by zero.
- if (!C->getAPIntValue())
+ if (C->isNullValue())
return SDValue();
std::vector<SDNode *> Built;
@@ -12344,7 +13586,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
return SDValue();
// Avoid division by zero.
- if (!C->getAPIntValue())
+ if (C->isNullValue())
return SDValue();
std::vector<SDNode*> Built;
@@ -12374,7 +13616,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
// does not require additional intermediate precision]
EVT VT = Op.getValueType();
SDLoc DL(Op);
- SDValue FPOne = DAG.getConstantFP(1.0, VT);
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
AddToWorklist(Est.getNode());
@@ -12409,7 +13651,7 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
unsigned Iterations) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
- SDValue ThreeHalves = DAG.getConstantFP(1.5, VT);
+ SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
@@ -12445,8 +13687,8 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
unsigned Iterations) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
- SDValue MinusThree = DAG.getConstantFP(-3.0, VT);
- SDValue MinusHalf = DAG.getConstantFP(-0.5, VT);
+ SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
+ SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
// Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
OpenPOWER on IntegriCloud