diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG')
26 files changed, 7618 insertions, 4452 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1df61e4..2c2dc85 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -246,10 +246,11 @@ namespace { SDValue visitSDIVREM(SDNode *N); SDValue visitUDIVREM(SDNode *N); SDValue visitAND(SDNode *N); + SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitOR(SDNode *N); + SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitXOR(SDNode *N); SDValue SimplifyVBinOp(SDNode *N); - SDValue SimplifyVUnaryOp(SDNode *N); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); @@ -267,6 +268,7 @@ namespace { SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); + SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); @@ -302,9 +304,16 @@ namespace { SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitSCALAR_TO_VECTOR(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue visitMLOAD(SDNode *N); SDValue visitMSTORE(SDNode *N); + SDValue visitMGATHER(SDNode *N); + SDValue visitMSCATTER(SDNode *N); + SDValue visitFP_TO_FP16(SDNode *N); + + SDValue visitFADDForFMACombine(SDNode *N); + SDValue visitFSUBForFMACombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); @@ -327,6 +336,7 @@ namespace { SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); + SDValue CombineExtLoad(SDNode *N); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); @@ -363,6 +373,28 @@ namespace { /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); + /// Holds a pointer to an LSBaseSDNode as well as information on where it + /// is located in a sequence of memory operations connected by a chain. + struct MemOpLink { + MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): + MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } + // Ptr to the mem node. + LSBaseSDNode *MemNode; + // Offset from the base ptr. + int64_t OffsetFromBase; + // What is the sequence number of this mem node. + // Lowest mem operand in the DAG starts at zero. + unsigned SequenceNum; + }; + + /// This is a helper function for MergeConsecutiveStores. When the source + /// elements of the consecutive stores are all constants or all extracted + /// vector elements, try to merge them into one larger store. + /// \return True if a merged store was created. + bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, + EVT MemVT, unsigned NumElem, + bool IsConstantSrc, bool UseVector); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -380,12 +412,9 @@ namespace { DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { - AttributeSet FnAttrs = - DAG.getMachineFunction().getFunction()->getAttributes(); - ForCodeSize = - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize) || - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + auto *F = DAG.getMachineFunction().getFunction(); + ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) || + F->hasFnAttribute(Attribute::MinSize); } /// Runs the dag combiner on all nodes in the work list @@ -446,7 +475,7 @@ void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { } SDValue TargetLowering::DAGCombinerInfo:: -CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { +CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); } @@ -566,7 +595,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::ConstantFP: { APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); V.changeSign(); - return DAG.getConstantFP(V, Op.getValueType()); + return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); } case ISD::FADD: // FIXME: determine better conditions for this xform. @@ -683,13 +712,23 @@ static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { EltVT.getSizeInBits() >= SplatBitSize); } -// \brief Returns the SDNode if it is a constant BuildVector or constant. -static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { +// \brief Returns the SDNode if it is a constant integer BuildVector +// or constant integer. +static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { if (isa<ConstantSDNode>(N)) return N.getNode(); - BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); - if (BV && BV->isConstant()) - return BV; + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) + return N.getNode(); + return nullptr; +} + +// \brief Returns the SDNode if it is a constant float BuildVector +// or constant float. +static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { + if (isa<ConstantFPSDNode>(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) + return N.getNode(); return nullptr; } @@ -735,10 +774,10 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc) { - if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { - if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { + if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); return SDValue(); } @@ -755,10 +794,10 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, } if (N1.getOpcode() == Opc) { - if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { - if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { + if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); return SDValue(); } @@ -1309,6 +1348,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); + case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); @@ -1344,9 +1384,13 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); + case ISD::MGATHER: return visitMGATHER(N); case ISD::MLOAD: return visitMLOAD(N); + case ISD::MSCATTER: return visitMSCATTER(N); case ISD::MSTORE: return visitMSTORE(N); + case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); } return SDValue(); } @@ -1412,9 +1456,10 @@ SDValue DAGCombiner::combine(SDNode *N) { SDNode *CSENode; if (const BinaryWithFlagsSDNode *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) { - CSENode = DAG.getNodeIfExists( - N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), - BinNode->hasNoSignedWrap(), BinNode->isExact()); + CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, + BinNode->Flags.hasNoUnsignedWrap(), + BinNode->Flags.hasNoSignedWrap(), + BinNode->Flags.hasExact()); } else { CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); } @@ -1471,7 +1516,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { switch (Op.getOpcode()) { case ISD::EntryToken: // Entry tokens don't need to be added to the list. They are - // rededundant. + // redundant. Changed = true; break; @@ -1500,7 +1545,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { SDValue Result; - // If we've change things around then replace token factor. + // If we've changed things around then replace token factor. if (Changed) { if (Ops.empty()) { // The entry token is the only possible outcome. @@ -1510,8 +1555,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); } - // Don't add users to work list. - return CombineTo(N, Result, false); + // Add users to worklist if AA is enabled, since it may introduce + // a lot of new chained token factors while removing memory deps. + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + return CombineTo(N, Result, UseAA /*add to worklist*/); } return Result; @@ -1534,17 +1582,37 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } +static bool isNullConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isNullValue(); +} + +static bool isAllOnesConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isAllOnesValue(); +} + +static bool isOneConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isOne(); +} + +/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a +/// ContantSDNode pointer else nullptr. +static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); + return Const != nullptr && !Const->isOpaque() ? Const : nullptr; +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (add x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N1.getNode())) @@ -1559,13 +1627,16 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (add c1, c2) -> c1+c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); // fold (add x, 0) -> x - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N0; // fold (add Sym, c) -> Sym+c if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) @@ -1576,22 +1647,21 @@ SDValue DAGCombiner::visitADD(SDNode *N) { (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A if (N1C && N0.getOpcode() == ISD::SUB) - if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, + if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(N1C->getAPIntValue()+ - N0C->getAPIntValue(), VT), + N0C->getAPIntValue(), DL, VT), N0.getOperand(1)); + } // reassociate add - SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); - if (RADD.getNode()) + if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) return RADD; // fold ((0-A) + B) -> B-A - if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && - cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) + if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); // fold (A + (0-B)) -> A-B - if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && - cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) + if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0))) return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); // fold (A+(B-A)) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) @@ -1651,34 +1721,27 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) - if (N1.getOpcode() == ISD::SHL && - N1.getOperand(0).getOpcode() == ISD::SUB) - if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) - if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, - DAG.getNode(ISD::SHL, SDLoc(N), VT, - N1.getOperand(0).getOperand(1), - N1.getOperand(1))); - if (N0.getOpcode() == ISD::SHL && - N0.getOperand(0).getOpcode() == ISD::SUB) - if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) - if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, - DAG.getNode(ISD::SHL, SDLoc(N), VT, - N0.getOperand(0).getOperand(1), - N0.getOperand(1))); + if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && + isNullConstant(N1.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, + N1.getOperand(0).getOperand(1), + N1.getOperand(1))); + if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && + isNullConstant(N0.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, + DAG.getNode(ISD::SHL, SDLoc(N), VT, + N0.getOperand(0).getOperand(1), + N0.getOperand(1))); if (N1.getOpcode() == ISD::AND) { SDValue AndOp0 = N1.getOperand(0); - ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); unsigned DestBits = VT.getScalarType().getSizeInBits(); // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. - if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { + if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); } @@ -1699,7 +1762,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (TN->getVT() == MVT::i1) { SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), - DAG.getConstant(1, VT)); + DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); } } @@ -1710,8 +1773,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue DAGCombiner::visitADDC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. @@ -1721,11 +1782,13 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { SDLoc(N), MVT::Glue)); // canonicalize constant to RHS. + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); // fold (addc x, 0) -> x + no carry out - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); @@ -1752,10 +1815,10 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), N1, N0, CarryIn); @@ -1773,25 +1836,21 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations, bool LegalTypes) { if (!VT.isVector()) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); return SDValue(); } SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); - ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : - dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (sub x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N1.getNode())) @@ -1803,14 +1862,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (sub c1, c2) -> c1-c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) - if (N1C) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, - DAG.getConstant(-N1C->getAPIntValue(), VT)); + if (N1C) { + SDLoc DL(N); + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); + } // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) - if (N0C && N0C->isAllOnesValue()) + if (isAllOnesConstant(N0)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) @@ -1822,10 +1885,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); // fold C2-(A+C1) -> (C2-C1)-A + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : + dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { + SDLoc DL(N); SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), - VT); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, + DL, VT); + return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); } // fold ((A+(B+or-C))-B) -> A+or-C @@ -1866,7 +1932,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) if (GA->getGlobal() == GB->getGlobal()) return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), - VT); + SDLoc(N), VT); } // sub X, (sextinreg Y i1) -> add X, (and Y 1) @@ -1875,7 +1941,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (TN->getVT() == MVT::i1) { SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), - DAG.getConstant(1, VT)); + DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); } } @@ -1886,8 +1952,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue DAGCombiner::visitSUBC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an SUB. @@ -1897,18 +1961,20 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { MVT::Glue)); // fold (subc x, x) -> 0 + no borrow - if (N0 == N1) - return CombineTo(N, DAG.getConstant(0, VT), - DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), + if (N0 == N1) { + SDLoc DL(N); + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + } // fold (subc x, 0) -> x + no borrow - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow - if (N0C && N0C->isAllOnesValue()) + if (isAllOnesConstant(N0)) return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); @@ -1935,33 +2001,41 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); bool N0IsConst = false; bool N1IsConst = false; + bool N1IsOpaqueConst = false; + bool N0IsOpaqueConst = false; APInt ConstValue0, ConstValue1; // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); } else { - N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr; - ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() - : APInt(); - N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr; - ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() - : APInt(); + N0IsConst = isa<ConstantSDNode>(N0); + if (N0IsConst) { + ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); + N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); + } + N1IsConst = isa<ConstantSDNode>(N1); + if (N1IsConst) { + ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); + N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); + } } // fold (mul c1, c2) -> c1*c2 - if (N0IsConst && N1IsConst) - return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); + if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) + return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, + N0.getNode(), N1.getNode()); - // canonicalize constant to RHS - if (N0IsConst && !N1IsConst) + // canonicalize constant to RHS (vector doesn't have to splat) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1 == 0) @@ -1974,23 +2048,30 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (N1IsConst && ConstValue1 == 1 && IsFullSplat) return N0; // fold (mul x, -1) -> 0-x - if (N1IsConst && ConstValue1.isAllOnesValue()) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), N0); + if (N1IsConst && ConstValue1.isAllOnesValue()) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), N0); + } // fold (mul x, (1 << c)) -> x << c - if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getConstant(ConstValue1.logBase2(), + if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && + IsFullSplat) { + SDLoc DL(N); + return DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(ConstValue1.logBase2(), DL, getShiftAmountTy(N0.getValueType()))); + } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { + if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && + IsFullSplat) { unsigned Log2Val = (-ConstValue1).logBase2(); + SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), - DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getConstant(Log2Val, + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), + DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(Log2Val, DL, getShiftAmountTy(N0.getValueType())))); } @@ -2041,8 +2122,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0.getOperand(1), N1)); // reassociate mul - SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); - if (RMUL.getNode()) + if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) return RMUL; return SDValue(); @@ -2051,26 +2131,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (sdiv c1, c2) -> c1/c2 - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C); // fold (sdiv X, 1) -> X - if (N1C && N1C->getAPIntValue() == 1LL) + if (N1C && N1C->isOne()) return N0; // fold (sdiv X, -1) -> 0-X - if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), N0); + if (N1C && N1C->isAllOnesValue()) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), N0); + } // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (!VT.isVector()) { @@ -2080,8 +2161,9 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { } // fold (sdiv X, pow2) -> simple ops after legalize - if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() || - (-N1C->getAPIntValue()).isPowerOf2())) { + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. if (TLI.isPow2SDivCheap()) @@ -2093,24 +2175,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return Res; unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); + SDLoc DL(N); // Splat the sign bit into the register SDValue SGN = - DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, - DAG.getConstant(VT.getScalarSizeInBits() - 1, + DAG.getNode(ISD::SRA, DL, VT, N0, + DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); AddToWorklist(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; SDValue SRL = - DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, - DAG.getConstant(VT.getScalarSizeInBits() - lg2, + DAG.getNode(ISD::SRL, DL, VT, SGN, + DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, getShiftAmountTy(SGN.getValueType()))); - SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); + SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); AddToWorklist(SRL.getNode()); AddToWorklist(ADD.getNode()); // Divide by pow2 - SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, - DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); + SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, + DAG.getConstant(lg2, DL, + getShiftAmountTy(ADD.getValueType()))); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. @@ -2118,10 +2202,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return SRA; AddToWorklist(SRA.getNode()); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); } - // if integer divide is expensive and we satisfy the requirements, emit an + // If integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. if (N1C && !TLI.isIntDivCheap()) { SDValue Op = BuildSDIV(N); @@ -2130,7 +2214,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2141,36 +2225,40 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (udiv c1, c2) -> c1/c2 - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT, + N0C, N1C)) + return Folded; // fold (udiv x, (1 << c)) -> x >>u c - if (N1C && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, - DAG.getConstant(N1C->getAPIntValue().logBase2(), + if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) { + SDLoc DL(N); + return DAG.getNode(ISD::SRL, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, getShiftAmountTy(N0.getValueType()))); + } // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { EVT ADDVT = N1.getOperand(1).getValueType(); - SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, + SDLoc DL(N); + SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() .logBase2(), - ADDVT)); + DL, ADDVT)); AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); + return DAG.getNode(ISD::SRL, DL, VT, N0, Add); } } } @@ -2182,7 +2270,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2193,13 +2281,15 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (srem c1, c2) -> c1%c2 - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT, + N0C, N1C)) + return Folded; // If we know the sign bits of both operands are zero, strength reduce to a // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 if (!VT.isVector()) { @@ -2224,7 +2314,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // undef % X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X % undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2235,27 +2325,33 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (urem c1, c2) -> c1%c2 - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT, + N0C, N1C)) + return Folded; // fold (urem x, pow2) -> (and x, pow2-1) - if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, - DAG.getConstant(N1C->getAPIntValue()-1,VT)); + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + N1C->getAPIntValue().isPowerOf2()) { + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); + } // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { + SDLoc DL(N); SDValue Add = - DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), + DAG.getNode(ISD::ADD, DL, VT, N1, + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT)); AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); } } } @@ -2277,7 +2373,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // undef % X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X % undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2288,21 +2384,23 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); SDLoc DL(N); // fold (mulhs x, 0) -> 0 - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) - if (N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, + if (isOneConstant(N1)) { + SDLoc DL(N); + return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, DAG.getConstant(N0.getValueType().getSizeInBits() - 1, + DL, getShiftAmountTy(N0.getValueType()))); + } // fold (mulhs x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. @@ -2315,7 +2413,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } @@ -2326,19 +2425,18 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); SDLoc DL(N); // fold (mulhu x, 0) -> 0 - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N1; // fold (mulhu x, 1) -> 0 - if (N1C && N1C->getAPIntValue() == 1) - return DAG.getConstant(0, N0.getValueType()); + if (isOneConstant(N1)) + return DAG.getConstant(0, DL, N0.getValueType()); // fold (mulhu x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. @@ -2351,7 +2449,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } @@ -2417,8 +2516,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); - // If the type twice as wide is legal, transform the mulhu to a wider multiply - // plus a shift. + // If the type is twice as wide is legal, transform the mulhu to a wider + // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); @@ -2429,7 +2528,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); @@ -2447,8 +2547,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); - // If the type twice as wide is legal, transform the mulhu to a wider multiply - // plus a shift. + // If the type is twice as wide is legal, transform the mulhu to a wider + // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); @@ -2459,7 +2559,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); @@ -2615,7 +2716,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // build vector of all zeros that might be illegal at this stage. if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { if (!LegalTypes) - ShOp = DAG.getConstant(0, VT); + ShOp = DAG.getConstant(0, SDLoc(N), VT); else ShOp = SDValue(); } @@ -2636,7 +2737,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ShOp = N0->getOperand(0); if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { if (!LegalTypes) - ShOp = DAG.getConstant(0, VT); + ShOp = DAG.getConstant(0, SDLoc(N), VT); else ShOp = SDValue(); } @@ -2657,19 +2758,122 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { return SDValue(); } +/// This contains all DAGCombine rules which reduce two values combined by +/// an And operation to a single value. This makes them reusable in the context +/// of visitSELECT(). Rules involving constants are not included as +/// visitSELECT() already handles those cases. +SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, + SDNode *LocReference) { + EVT VT = N1.getValueType(); + + // fold (and x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, SDLoc(LocReference), VT); + // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) + if (isNullConstant(LR) && Op1 == ISD::SETEQ) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + if (isAllOnesConstant(LR)) { + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + } + } + // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) + if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && + Op0 == Op1 && LL.getValueType().isInteger() && + Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || + (isAllOnesConstant(LR) && isNullConstant(RR)))) { + SDLoc DL(N0); + SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), + LL, DAG.getConstant(1, DL, + LL.getValueType())); + AddToWorklist(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, + DAG.getConstant(2, DL, LL.getValueType()), + ISD::SETUGE); + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + getSetCCResultType(N0.getSimpleValueType()))))) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } + } + + if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && + VT.getSizeInBits() <= 64) { + if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal + // immediate for an add, but it is legal if its top c2 bits are set, + // transform the ADD so the immediate doesn't need to be materialized + // in a register. + if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + SRLI->getZExtValue()); + if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { + ADDC |= Mask; + if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + SDLoc DL(N0); + SDValue NewAdd = + DAG.getNode(ISD::ADD, DL, VT, + N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); + CombineTo(N0.getNode(), NewAdd); + // Return N so it doesn't get rechecked! + return SDValue(LocReference, 0); + } + } + } + } + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LL, LR, RL, RR, CC0, CC1; - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N1.getValueType(); - unsigned BitWidth = VT.getScalarType().getSizeInBits(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (and x, 0) -> 0, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) @@ -2677,13 +2881,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.getConstant( APInt::getNullValue( N0.getValueType().getScalarType().getSizeInBits()), - N0.getValueType()); + SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant( APInt::getNullValue( N1.getValueType().getScalarType().getSizeInBits()), - N1.getValueType()); + SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) @@ -2692,25 +2896,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return N0; } - // fold (and x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); // fold (and c1, c2) -> c1&c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x - if (N1C && N1C->isAllOnesValue()) + if (isAllOnesConstant(N1)) return N0; // if (and x, c) is known to be zero, return 0 + unsigned BitWidth = VT.getScalarType().getSizeInBits(); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(BitWidth))) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // reassociate and - SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); - if (RAND.getNode()) + if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) return RAND; // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) @@ -2840,117 +3044,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } - // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) - if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ - ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); - ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); - - if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && - LL.getValueType().isInteger()) { - // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) - if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) - if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); - } - // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) - if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - } - // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) - if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && - Op0 == Op1 && LL.getValueType().isInteger() && - Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && - cast<ConstantSDNode>(RR)->isAllOnesValue()) || - (cast<ConstantSDNode>(LR)->isAllOnesValue() && - cast<ConstantSDNode>(RR)->isNullValue()))) { - SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), - LL, DAG.getConstant(1, LL.getValueType())); - AddToWorklist(ADDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ADDNode, - DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); - } - // canonicalize equivalent to ll == rl - if (LL == RR && LR == RL) { - Op1 = ISD::getSetCCSwappedOperands(Op1); - std::swap(RL, RR); - } - if (LL == RL && LR == RR) { - bool isInteger = LL.getValueType().isInteger(); - ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); - if (Result != ISD::SETCC_INVALID && - (!LegalOperations || - (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getSimpleValueType()))))) - return DAG.getSetCC(SDLoc(N), N0.getValueType(), - LL, LR, Result); - } - } - - // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } - - // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) - // fold (and (sra)) -> (and (srl)) when possible. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) - return SDValue(N, 0); - - // fold (zext_inreg (extload x)) -> (zextload x) - if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - // If we zero all the possible extended bits, then we can turn this into - // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); - if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && - ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } - // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use - if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - N0.hasOneUse()) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - // If we zero all the possible extended bits, then we can turn this into - // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); - if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && - ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) @@ -3001,8 +3094,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned LVTStoreBytes = LoadedVT.getStoreSize(); unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, - NewPtr, DAG.getConstant(PtrOff, PtrType)); + SDLoc DL(LN0); + NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, + NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); Alignment = MinAlign(Alignment, PtrOff); } @@ -3022,33 +3116,60 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } - if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && - VT.getSizeInBits() <= 64) { - if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - APInt ADDC = ADDI->getAPIntValue(); - if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { - // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal - // immediate for an add, but it is legal if its top c2 bits are set, - // transform the ADD so the immediate doesn't need to be materialized - // in a register. - if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { - APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), - SRLI->getZExtValue()); - if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { - ADDC |= Mask; - if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { - SDValue NewAdd = - DAG.getNode(ISD::ADD, SDLoc(N0), VT, - N0.getOperand(0), DAG.getConstant(ADDC, VT)); - CombineTo(N0.getNode(), NewAdd); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } - } - } - } + if (SDValue Combined = visitANDLike(N0, N1, N)) + return Combined; + + // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; } + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (zext_inreg (extload x)) -> (zextload x) + if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), @@ -3159,9 +3280,12 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, } SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); - if (OpSizeInBits > 16) - Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, - DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); + if (OpSizeInBits > 16) { + SDLoc DL(N); + Res = DAG.getNode(ISD::SRL, DL, VT, Res, + DAG.getConstant(OpSizeInBits - 16, DL, + getShiftAmountTy(VT))); + } return Res; } @@ -3299,33 +3423,125 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) return SDValue(); - SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, - SDValue(Parts[0],0)); + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, + SDValue(Parts[0], 0)); // Result of the bswap should be rotated by 16. If it's not legal, then // do (x << 16) | (x >> 16). - SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); + SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) - return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); + return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) - return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); - return DAG.getNode(ISD::OR, SDLoc(N), VT, - DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), - DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); + return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); +} + +/// This contains all DAGCombine rules which reduce two values combined by +/// an Or operation to a single value \see visitANDLike(). +SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { + EVT VT = N1.getValueType(); + // fold (or x, undef) -> -1 + if (!LegalOperations && + (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), + SDLoc(LocReference), VT); + } + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) { + // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) + // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) + if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) + // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) + if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + getSetCCResultType(N0.getValueType()))))) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } + } + + // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + if (const ConstantSDNode *N0O1C = + getAsNonOpaqueConstant(N0.getOperand(1))) { + if (const ConstantSDNode *N1O1C = + getAsNonOpaqueConstant(N1.getOperand(1))) { + // We can only do this xform if we know that bits from X that are set in + // C2 but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = N0O1C->getAPIntValue(); + const APInt &RHSMask = N1O1C->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(0), N1.getOperand(0)); + SDLoc DL(LocReference); + return DAG.getNode(ISD::AND, DL, VT, X, + DAG.getConstant(LHSMask | RHSMask, DL, VT)); + } + } + } + } + + // (or (and X, M), (and X, N)) -> (and X, (or M, N)) + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(0) == N1.getOperand(0) && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(1), N1.getOperand(1)); + return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X); + } + + return SDValue(); } SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LL, LR, RL, RR, CC0, CC1; - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N1.getValueType(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (or x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) @@ -3339,13 +3555,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return DAG.getConstant( APInt::getAllOnesValue( N0.getValueType().getScalarType().getSizeInBits()), - N0.getValueType()); + SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant( APInt::getAllOnesValue( N1.getValueType().getScalarType().getSizeInBits()), - N1.getValueType()); + SDLoc(N), N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) @@ -3404,28 +3620,28 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } - // fold (or x, undef) -> -1 - if (!LegalOperations && - (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { - EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; - return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); - } // fold (or c1, c2) -> c1|c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N0; // fold (or x, -1) -> -1 - if (N1C && N1C->isAllOnesValue()) + if (isAllOnesConstant(N1)) return N1; // fold (or x, c) -> c iff (x & ~c) == 0 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; + if (SDValue Combined = visitORLike(N0, N1, N)) + return Combined; + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) SDValue BSwap = MatchBSwapHWord(N, N0, N1); if (BSwap.getNode()) @@ -3435,8 +3651,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return BSwap; // reassociate or - SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); - if (ROR.getNode()) + if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) == 0. @@ -3444,86 +3659,20 @@ SDValue DAGCombiner::visitOR(SDNode *N) { isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { - if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) + if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, + N1C, C1)) return DAG.getNode( ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); return SDValue(); } } - // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) - if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ - ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); - ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); - - if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && - LL.getValueType().isInteger()) { - // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) - // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) - if (cast<ConstantSDNode>(LR)->isNullValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) - // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) - if (cast<ConstantSDNode>(LR)->isAllOnesValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); - } - } - // canonicalize equivalent to ll == rl - if (LL == RR && LR == RL) { - Op1 = ISD::getSetCCSwappedOperands(Op1); - std::swap(RL, RR); - } - if (LL == RL && LR == RR) { - bool isInteger = LL.getValueType().isInteger(); - ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); - if (Result != ISD::SETCC_INVALID && - (!LegalOperations || - (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getValueType()))))) - return DAG.getSetCC(SDLoc(N), N0.getValueType(), - LL, LR, Result); - } - } - // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) { SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); if (Tmp.getNode()) return Tmp; } - // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. - if (N0.getOpcode() == ISD::AND && - N1.getOpcode() == ISD::AND && - N0.getOperand(1).getOpcode() == ISD::Constant && - N1.getOperand(1).getOpcode() == ISD::Constant && - // Don't increase # computations. - (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { - // We can only do this xform if we know that bits from X that are set in C2 - // but not in C1 are already zero. Likewise for Y. - const APInt &LHSMask = - cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - const APInt &RHSMask = - cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); - - if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && - DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { - SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1.getOperand(0)); - return DAG.getNode(ISD::AND, SDLoc(N), VT, X, - DAG.getConstant(LHSMask | RHSMask, VT)); - } - } - // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); @@ -3751,7 +3900,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; } - Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT)); } return Rot.getNode(); @@ -3793,15 +3942,12 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LHS, RHS, CC; - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (xor x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) @@ -3812,27 +3958,30 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (xor undef, undef) -> 0. This is a common idiom (misuse). if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (xor x, undef) -> undef if (N0.getOpcode() == ISD::UNDEF) return N0; if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (xor c1, c2) -> c1^c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N0; // reassociate xor - SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); - if (RXOR.getNode()) + if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) return RXOR; // fold !(x cc y) -> (x !cc y) + SDValue LHS, RHS, CC; if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { bool isInt = LHS.getValueType().isInteger(); ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), @@ -3853,18 +4002,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) - if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && + if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND && N0.getNode()->hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ SDValue V = N0.getOperand(0); - V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, - DAG.getConstant(1, V.getValueType())); + SDLoc DL(N0); + V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, + DAG.getConstant(1, DL, V.getValueType())); AddToWorklist(V.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc - if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && + if (isOneConstant(N1) && VT == MVT::i1 && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { @@ -3876,7 +4026,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants - if (N1C && N1C->isAllOnesValue() && + if (isAllOnesConstant(N1) && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { @@ -3897,21 +4047,48 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) if (N1C && N0.getOpcode() == ISD::XOR) { - ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); - ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (N00C) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), + if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), DAG.getConstant(N1C->getAPIntValue() ^ - N00C->getAPIntValue(), VT)); - if (N01C) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), + N00C->getAPIntValue(), DL, VT)); + } + if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), DAG.getConstant(N1C->getAPIntValue() ^ - N01C->getAPIntValue(), VT)); + N01C->getAPIntValue(), DL, VT)); + } } // fold (xor x, x) -> 0 if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); + // fold (xor (shl 1, x), -1) -> (rotl ~1, x) + // Here is a concrete example of this equivalence: + // i16 x == 14 + // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 + // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 + // + // => + // + // i16 ~1 == 0b1111111111111110 + // i16 rol(~1, 14) == 0b1011111111111111 + // + // Some additional tips to help conceptualize this transform: + // - Try to see the operation as placing a single zero in a value of all ones. + // - There exists no value for x which would allow the result to contain zero. + // - Values of x larger than the bitwidth are undefined and do not require a + // consistent result. + // - Pushing the zero left requires shifting one bits in from the right. + // A rotate left of ~1 is a nice way of achieving the desired result. + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL + && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), + N0.getOperand(1)); + } + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); @@ -3929,10 +4106,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { /// Handle transforms common to the three shifts, when the shift amount is a /// constant. SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { - // We can't and shouldn't fold opaque constants. - if (Amt->isOpaque()) - return SDValue(); - SDNode *LHS = N->getOperand(0).getNode(); if (!LHS->hasOneUse()) return SDValue(); @@ -3959,8 +4132,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { } // We require the RHS of the binop to be a constant and not opaque as well. - ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); - if (!BinOpCst || BinOpCst->isOpaque()) return SDValue(); + ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); + if (!BinOpCst) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant. // If it is not a shift, it pessimizes some common cases like: @@ -4013,14 +4186,17 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { SDValue N01 = N->getOperand(0).getOperand(1); if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { - EVT TruncVT = N->getValueType(0); - SDValue N00 = N->getOperand(0).getOperand(0); - APInt TruncC = N01C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + if (!N01C->isOpaque()) { + EVT TruncVT = N->getValueType(0); + SDValue N00 = N->getOperand(0).getOperand(0); + APInt TruncC = N01C->getAPIntValue(); + TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + SDLoc DL(N); - return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, - DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), - DAG.getConstant(TruncC, TruncVT)); + return DAG.getNode(ISD::AND, DL, TruncVT, + DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), + DAG.getConstant(TruncC, DL, TruncVT)); + } } } @@ -4042,15 +4218,14 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); // If setcc produces all-one true value then: @@ -4064,7 +4239,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, + N01CV, N1CV)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } } else { @@ -4074,10 +4250,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } // fold (shl c1, c2) -> c1<<c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); // fold (shl 0, x) -> 0 - if (N0C && N0C->isNullValue()) + if (isNullConstant(N0)) return N0; // fold (shl x, c >= size(x)) -> undef if (N1C && N1C->getZExtValue() >= OpSizeInBits) @@ -4087,11 +4264,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return N0; // fold (shl undef, x) -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -4108,10 +4285,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { uint64_t c1 = N0C1->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); + SDLoc DL(N); if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, DL, N1.getValueType())); } } @@ -4131,12 +4309,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT InnerShiftVT = N0Op0.getValueType(); uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); if (c2 >= OpSizeInBits - InnerShiftSize) { + SDLoc DL(N0); if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N0), VT, - DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, + DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)), - DAG.getConstant(c1 + c2, N1.getValueType())); + DAG.getConstant(c1 + c2, DL, N1.getValueType())); } } } @@ -4154,8 +4333,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (c1 == c2) { SDValue NewOp0 = N0.getOperand(0); EVT CountVT = NewOp0.getOperand(1).getValueType(); - SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), - NewOp0, DAG.getConstant(c2, CountVT)); + SDLoc DL(N); + SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), + NewOp0, + DAG.getConstant(c2, DL, CountVT)); AddToWorklist(NewSHL.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); } @@ -4176,25 +4357,30 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue Shift; if (c2 > c1) { Mask = Mask.shl(c2 - c1); - Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c2 - c1, N1.getValueType())); + SDLoc DL(N); + Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(c2 - c1, DL, N1.getValueType())); } else { Mask = Mask.lshr(c1 - c2); - Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 - c2, N1.getValueType())); + SDLoc DL(N); + Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 - c2, DL, N1.getValueType())); } - return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, - DAG.getConstant(Mask, VT)); + SDLoc DL(N0); + return DAG.getNode(ISD::AND, DL, VT, Shift, + DAG.getConstant(Mask, DL, VT)); } } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { unsigned BitSize = VT.getScalarSizeInBits(); + SDLoc DL(N); SDValue HiBitsMask = DAG.getConstant(APInt::getHighBitsSet(BitSize, - BitSize - N1C->getZExtValue()), VT); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), + BitSize - N1C->getZExtValue()), + DL, VT); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); } @@ -4210,7 +4396,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); } - if (N1C) { + if (N1C && !N1C->isOpaque()) { SDValue NewSHL = visitShiftByConstant(N, N1C); if (NewSHL.getNode()) return NewSHL; @@ -4222,27 +4408,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; N1C = isConstOrConstSplat(N1); } // fold (sra c1, c2) -> (sra c1, c2) - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); // fold (sra 0, x) -> 0 - if (N0C && N0C->isNullValue()) + if (isNullConstant(N0)) return N0; // fold (sra -1, x) -> -1 - if (N0C && N0C->isAllOnesValue()) + if (isAllOnesConstant(N0)) return N0; // fold (sra x, (setge c, size(x))) -> undef if (N1C && N1C->getZExtValue() >= OpSizeInBits) @@ -4270,8 +4456,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); if (Sum >= OpSizeInBits) Sum = OpSizeInBits - 1; - return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(Sum, N1.getValueType())); + SDLoc DL(N); + return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), + DAG.getConstant(Sum, DL, N1.getValueType())); } } @@ -4303,14 +4490,15 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && TLI.isTruncateFree(VT, TruncVT)) { - SDValue Amt = DAG.getConstant(ShiftAmt, - getShiftAmountTy(N0.getOperand(0).getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, - N0.getOperand(0), Amt); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, - Shift); - return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), - N->getValueType(0), Trunc); + SDLoc DL(N); + SDValue Amt = DAG.getConstant(ShiftAmt, DL, + getShiftAmountTy(N0.getOperand(0).getValueType())); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, + N0.getOperand(0), Amt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, + Shift); + return DAG.getNode(ISD::SIGN_EXTEND, DL, + N->getValueType(0), Trunc); } } } @@ -4337,12 +4525,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT LargeVT = N0Op0.getValueType(); if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { + SDLoc DL(N); SDValue Amt = - DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), + DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, getShiftAmountTy(N0Op0.getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, + SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); + return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); } } } @@ -4356,7 +4545,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); - if (N1C) { + if (N1C && !N1C->isOpaque()) { SDValue NewSRA = visitShiftByConstant(N, N1C); if (NewSRA.getNode()) return NewSRA; @@ -4368,24 +4557,24 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; N1C = isConstOrConstSplat(N1); } // fold (srl c1, c2) -> c1 >>u c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); // fold (srl 0, x) -> 0 - if (N0C && N0C->isNullValue()) + if (isNullConstant(N0)) return N0; // fold (srl x, c >= size(x)) -> undef if (N1C && N1C->getZExtValue() >= OpSizeInBits) @@ -4396,17 +4585,18 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // if (srl x, c) is known to be zero, return 0 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRL) { if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { uint64_t c1 = N01C->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); + SDLoc DL(N); if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, DL, N1.getValueType())); } } @@ -4422,12 +4612,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); // This is only valid if the OpSizeInBits + c1 = size of inner shift. if (c1 + OpSizeInBits == InnerShiftSize) { + SDLoc DL(N0); if (c1 + c2 >= InnerShiftSize) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, - DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(ISD::SRL, DL, InnerShiftVT, N0.getOperand(0)->getOperand(0), - DAG.getConstant(c1 + c2, ShiftCountVT))); + DAG.getConstant(c1 + c2, DL, + ShiftCountVT))); } } @@ -4436,8 +4628,9 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { unsigned BitSize = N0.getScalarValueSizeInBits(); if (BitSize <= 64) { uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(~0ULL >> ShAmt, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, DL, VT)); } } @@ -4451,14 +4644,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { uint64_t ShiftAmt = N1C->getZExtValue(); - SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, + SDLoc DL0(N0); + SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0), - DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); + DAG.getConstant(ShiftAmt, DL0, + getShiftAmountTy(SmallVT))); AddToWorklist(SmallShift.getNode()); APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), - DAG.getConstant(Mask, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), + DAG.getConstant(Mask, DL, VT)); } } @@ -4477,12 +4673,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. - if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); + if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. APInt UnknownBits = ~KnownZero; - if (UnknownBits == 0) return DAG.getConstant(1, VT); + if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. if ((UnknownBits & (UnknownBits - 1)) == 0) { @@ -4494,13 +4690,16 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue Op = N0.getOperand(0); if (ShAmt) { - Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, - DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); + SDLoc DL(N0); + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getConstant(ShAmt, DL, + getShiftAmountTy(Op.getValueType()))); AddToWorklist(Op.getNode()); } - return DAG.getNode(ISD::XOR, SDLoc(N), VT, - Op, DAG.getConstant(1, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, + Op, DAG.getConstant(1, DL, VT)); } } @@ -4517,7 +4716,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - if (N1C) { + if (N1C && !N1C->isOpaque()) { SDValue NewSRL = visitShiftByConstant(N, N1C); if (NewSRL.getNode()) return NewSRL; @@ -4651,23 +4850,19 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); EVT VT = N->getValueType(0); EVT VT0 = N0.getValueType(); // fold (select C, X, X) -> X if (N1 == N2) return N1; - // fold (select true, X, Y) -> X - if (N0C && !N0C->isNullValue()) - return N1; - // fold (select false, X, Y) -> Y - if (N0C && N0C->isNullValue()) - return N2; + if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) { + // fold (select true, X, Y) -> X + // fold (select false, X, Y) -> Y + return !N0C->isNullValue() ? N1 : N2; + } // fold (select C, 1, X) -> (or C, X) - if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) + if (VT == MVT::i1 && isOneConstant(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) // We can't do this reliably if integer based booleans have different contents @@ -4684,40 +4879,43 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { TLI.getBooleanContents(false, true) && TLI.getBooleanContents(false, false) == TargetLowering::ZeroOrOneBooleanContent)) && - N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { + isNullConstant(N1) && isOneConstant(N2)) { SDValue XORNode; - if (VT == VT0) - return DAG.getNode(ISD::XOR, SDLoc(N), VT0, - N0, DAG.getConstant(1, VT0)); - XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, - N0, DAG.getConstant(1, VT0)); + if (VT == VT0) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT0, + N0, DAG.getConstant(1, DL, VT0)); + } + SDLoc DL0(N0); + XORNode = DAG.getNode(ISD::XOR, DL0, VT0, + N0, DAG.getConstant(1, DL0, VT0)); AddToWorklist(XORNode.getNode()); if (VT.bitsGT(VT0)) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); } // fold (select C, 0, X) -> (and (not C), X) - if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { + if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) - if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { + if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } // fold (select C, X, 0) -> (and C, X) - if (VT == MVT::i1 && N2C && N2C->isNullValue()) + if (VT == MVT::i1 && isNullConstant(N2)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or X, Y) - if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) + if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) - if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) + if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // If we can fold this based on the true/false value, do so. @@ -4757,6 +4955,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SimplifySelect(SDLoc(N), N0, N1, N2); } + if (VT0 == MVT::i1) { + if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + InnerSelect, N2); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + InnerSelect); + } + } + + // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y + if (N1->getOpcode() == ISD::SELECT) { + SDValue N1_0 = N1->getOperand(0); + SDValue N1_1 = N1->getOperand(1); + SDValue N1_2 = N1->getOperand(2); + if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { + // Create the actual and node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), + N0, N1_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, + N1_1, N2); + } + // Otherwise see if we can optimize the "and" to a better pattern. + if (SDValue Combined = visitANDLike(N0, N1_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1_1, N2); + } + } + // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y + if (N2->getOpcode() == ISD::SELECT) { + SDValue N2_0 = N2->getOperand(0); + SDValue N2_1 = N2->getOperand(1); + SDValue N2_2 = N2->getOperand(2); + if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { + // Create the actual or node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), + N0, N2_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, + N1, N2_2); + } + // Otherwise see if we can optimize to a better pattern. + if (SDValue Combined = visitORLike(N0, N2_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1, N2_2); + } + } + } + return SDValue(); } @@ -4832,6 +5093,67 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } +SDValue DAGCombiner::visitMSCATTER(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); + SDValue Mask = MSC->getMask(); + SDValue Data = MSC->getValue(); + SDLoc DL(N); + + // If the MSCATTER data type requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (Mask.getOpcode() != ISD::SETCC) + return SDValue(); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != + TargetLowering::TypeSplitVector) + return SDValue(); + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); + + SDValue Chain = MSC->getChain(); + + EVT MemoryVT = MSC->getMemoryVT(); + unsigned Alignment = MSC->getOriginalAlignment(); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + SDValue BasePtr = MSC->getBasePtr(); + SDValue IndexLo, IndexHi; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MSC->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, MSC->getAAInfo(), MSC->getRanges()); + + SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo }; + Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), + DL, OpsLo, MMO); + + SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi}; + Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), + DL, OpsHi, MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (Level >= AfterLegalizeTypes) @@ -4878,7 +5200,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), + getMachineMemOperand(MST->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MST->getAAInfo(), MST->getRanges()); @@ -4887,10 +5209,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), + getMachineMemOperand(MST->getPointerInfo(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, MST->getAAInfo(), MST->getRanges()); @@ -4906,6 +5228,83 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitMGATHER(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N); + SDValue Mask = MGT->getMask(); + SDLoc DL(N); + + // If the MGATHER result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + + if (Mask.getOpcode() != ISD::SETCC) + return SDValue(); + + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + SDValue Src0 = MGT->getValue(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + SDValue Chain = MGT->getChain(); + EVT MemoryVT = MGT->getMemoryVT(); + unsigned Alignment = MGT->getOriginalAlignment(); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue BasePtr = MGT->getBasePtr(); + SDValue Index = MGT->getIndex(); + SDValue IndexLo, IndexHi; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), MGT->getRanges()); + + SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo }; + Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, + MMO); + + SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi}; + Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, + MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); + + SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + + SDValue RetOps[] = { GatherRes, Chain }; + return DAG.getMergeValues(RetOps, DL); +} + SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (Level >= AfterLegalizeTypes) @@ -4953,7 +5352,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); @@ -4962,10 +5361,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); @@ -5021,7 +5420,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { EVT VT = LHS.getValueType(); SDValue Shift = DAG.getNode( ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); + DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT)); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -5029,6 +5428,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + // If the VSELECT result requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons @@ -5141,7 +5543,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, EVT VT = N->getValueType(0); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || - Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); + Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) + && "Expected EXTEND dag node in input!"); // fold (sext c1) -> c1 // fold (zext c1) -> c1 @@ -5163,7 +5566,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); unsigned ShAmt = VTBits - EVTBits; SmallVector<SDValue, 8> Elts; - unsigned NumElts = N0->getNumOperands(); + unsigned NumElts = VT.getVectorNumElements(); SDLoc DL(N); for (unsigned i=0; i != NumElts; ++i) { @@ -5173,14 +5576,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, continue; } + SDLoc DL(Op); ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); - if (Opcode == ISD::SIGN_EXTEND) + if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), - SVT)); + DL, SVT)); else Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), - SVT)); + DL, SVT)); } return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); @@ -5271,6 +5675,102 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, } } +// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). +SDValue DAGCombiner::CombineExtLoad(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT DstVT = N->getValueType(0); + EVT SrcVT = N0.getValueType(); + + assert((N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND) && + "Unexpected node type (not an extend)!"); + + // fold (sext (load x)) to multiple smaller sextloads; same for zext. + // For example, on a target with legal v4i32, but illegal v8i32, turn: + // (v8i32 (sext (v8i16 (load x)))) + // into: + // (v8i32 (concat_vectors (v4i32 (sextload x)), + // (v4i32 (sextload (x + 16))))) + // Where uses of the original load, i.e.: + // (v8i16 (load x)) + // are replaced with: + // (v8i16 (truncate + // (v8i32 (concat_vectors (v4i32 (sextload x)), + // (v4i32 (sextload (x + 16))))))) + // + // This combine is only applicable to illegal, but splittable, vectors. + // All legal types, and illegal non-vector types, are handled elsewhere. + // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. + // + if (N0->getOpcode() != ISD::LOAD) + return SDValue(); + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + + if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || + !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || + !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + return SDValue(); + + SmallVector<SDNode *, 4> SetCCs; + if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI)) + return SDValue(); + + ISD::LoadExtType ExtType = + N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + + // Try to split the vector types to get down to legal types. + EVT SplitSrcVT = SrcVT; + EVT SplitDstVT = DstVT; + while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && + SplitSrcVT.getVectorNumElements() > 1) { + SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; + SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; + } + + if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) + return SDValue(); + + SDLoc DL(N); + const unsigned NumSplits = + DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); + const unsigned Stride = SplitSrcVT.getStoreSize(); + SmallVector<SDValue, 4> Loads; + SmallVector<SDValue, 4> Chains; + + SDValue BasePtr = LN0->getBasePtr(); + for (unsigned Idx = 0; Idx < NumSplits; Idx++) { + const unsigned Offset = Idx * Stride; + const unsigned Align = MinAlign(LN0->getAlignment(), Offset); + + SDValue SplitLoad = DAG.getExtLoad( + ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, + LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, + LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), + Align, LN0->getAAInfo()); + + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Stride, DL, BasePtr.getValueType())); + + Loads.push_back(SplitLoad.getValue(0)); + Chains.push_back(SplitLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); + + CombineTo(N, NewValue); + + // Replace uses of the original load (before extension) + // with a truncate of the concatenated sextloaded vectors. + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); + CombineTo(N0.getNode(), Trunc, NewChain); + ExtendSetCCUses(SetCCs, Trunc, NewValue, DL, + (ISD::NodeType)N->getOpcode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -5337,17 +5837,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } // fold (sext (load x)) -> (sext (truncate (sextload x))) - // None of the supported targets knows how to perform load and sign extend - // on vectors in one instruction. We only perform this transformation on - // scalars. - if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && - ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + // Only generate vector extloads when 1) they're legal, and 2) they are + // deemed desirable by the target. + if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + ((!LegalOperations && !VT.isVector() && + !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, @@ -5364,6 +5865,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } + // fold (sext (load x)) to multiple smaller sextloads. + // Only on illegal but splittable vectors. + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + // fold (sext (sextload x)) -> (sext (truncate (sextload x))) // fold (sext ( extload x)) -> (sext (truncate (sextload x))) if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && @@ -5407,14 +5913,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, - ExtLoad, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, + ExtLoad, DAG.getConstant(Mask, DL, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5457,11 +5964,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) unsigned ElementWidth = VT.getScalarType().getSizeInBits(); + SDLoc DL(N); SDValue NegOne = - DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); + DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT); SDValue SCC = - SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), - NegOne, DAG.getConstant(0, VT), + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + NegOne, DAG.getConstant(0, DL, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; @@ -5473,7 +5981,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC); return DAG.getSelect(DL, VT, SetCC, - NegOne, DAG.getConstant(0, VT)); + NegOne, DAG.getConstant(0, DL, VT)); } } } @@ -5507,11 +6015,9 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, SDValue Op1 = N->getOperand(1); assert(Op0.getValueType() == Op1.getValueType()); - ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); - ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); - if (COp0 && COp0->isNullValue()) + if (isNullConstant(Op0)) Op = Op1; - else if (COp1 && COp1->isNullValue()) + else if (isNullConstant(Op1)) Op = Op0; else return false; @@ -5622,22 +6128,24 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - X, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + X, DAG.getConstant(Mask, DL, VT)); } // fold (zext (load x)) -> (zext (truncate (zextload x))) - // None of the supported targets knows how to perform load and vector_zext - // on vectors in one instruction. We only perform this transformation on - // scalars. - if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && - ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + // Only generate vector extloads when 1) they're legal, and 2) they are + // deemed desirable by the target. + if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + ((!LegalOperations && !VT.isVector() && + !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, @@ -5655,6 +6163,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } } + // fold (zext (load x)) to multiple smaller zextloads. + // Only on illegal but splittable vectors. + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + // fold (zext (and/or/xor (load x), cst)) -> // (and/or/xor (zextload x), (zext cst)) if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || @@ -5677,14 +6190,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, - ExtLoad, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, + ExtLoad, DAG.getConstant(Mask, DL, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5722,19 +6236,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. // Only do this before legalize for now. EVT EltVT = VT.getVectorElementType(); + SDLoc DL(N); SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), - DAG.getConstant(1, EltVT)); + DAG.getConstant(1, DL, EltVT)); if (VT.getSizeInBits() == N0VT.getSizeInBits()) // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, DL, VT, + DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()), - DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); // If the desired elements are smaller or larger than the source @@ -5747,18 +6262,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), - DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps)); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getSExtOrTrunc(VsetCC, DL, VT), + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDLoc DL(N); SDValue SCC = - SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, VT), DAG.getConstant(0, VT), + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; } @@ -5850,8 +6366,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - X, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + X, DAG.getConstant(Mask, DL, VT)); } // fold (aext (load x)) -> (aext (truncate (extload x))) @@ -5934,9 +6451,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDLoc DL(N); SDValue SCC = - SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, VT), DAG.getConstant(0, VT), + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; @@ -5957,7 +6475,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; if (NewVal != CVal) - return DAG.getConstant(NewVal, V.getValueType()); + return DAG.getConstant(NewVal, SDLoc(V), V.getValueType()); break; } case ISD::OR: @@ -5972,7 +6490,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) break; - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { + if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) { // See if we can recursively simplify the LHS. unsigned Amt = RHSC->getZExtValue(); @@ -6117,9 +6635,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); - SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), + SDLoc DL(LN0); + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, LN0->getBasePtr(), - DAG.getConstant(PtrOff, PtrType)); + DAG.getConstant(PtrOff, DL, PtrType)); AddToWorklist(NewPtr.getNode()); SDValue Load; @@ -6148,11 +6667,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result // of that operation is undefined. + SDLoc DL(N0); if (ShLeftAmt >= VT.getSizeInBits()) - Result = DAG.getConstant(0, VT); + Result = DAG.getConstant(0, DL, VT); else - Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, - Result, DAG.getConstant(ShLeftAmt, ShImmTy)); + Result = DAG.getNode(ISD::SHL, DL, VT, + Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); } // Return the new loaded value. @@ -6279,7 +6799,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), - Op.getValueType())); + SDLoc(Op), Op.getValueType())); } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); @@ -6288,6 +6808,20 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + + return SDValue(); +} + SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -6297,7 +6831,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getValueType() == N->getValueType(0)) return N0; // fold (truncate c1) -> c1 - if (isa<ConstantSDNode>(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) @@ -6350,9 +6884,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, N0.getOperand(0)); + SDLoc DL(N); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - SDLoc(N), TrTy, V, - DAG.getConstant(Index, IndexTy)); + DL, TrTy, V, + DAG.getConstant(Index, DL, IndexTy)); } } @@ -6598,13 +7133,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { N0.getOperand(0)); AddToWorklist(NewConv.getNode()); + SDLoc DL(N); APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, - NewConv, DAG.getConstant(SignBit, VT)); + return DAG.getNode(ISD::XOR, DL, VT, + NewConv, DAG.getConstant(SignBit, DL, VT)); assert(N0.getOpcode() == ISD::FABS); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - NewConv, DAG.getConstant(~SignBit, VT)); + return DAG.getNode(ISD::AND, DL, VT, + NewConv, DAG.getConstant(~SignBit, DL, VT)); } // fold (bitconvert (fcopysign cst, x)) -> @@ -6629,9 +7165,11 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. - X = DAG.getNode(ISD::SRL, SDLoc(X), + SDLoc DL(X); + X = DAG.getNode(ISD::SRL, DL, X.getValueType(), X, - DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); + DAG.getConstant(OrigXWidth-VTWidth, DL, + X.getValueType())); AddToWorklist(X.getNode()); X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); AddToWorklist(X.getNode()); @@ -6639,13 +7177,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, - X, DAG.getConstant(SignBit, VT)); + X, DAG.getConstant(SignBit, SDLoc(X), VT)); AddToWorklist(X.getNode()); SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, - Cst, DAG.getConstant(~SignBit, VT)); + Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); AddToWorklist(Cst.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); @@ -6659,6 +7197,51 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { return CombineLD; } + // Remove double bitcasts from shuffles - this is often a legacy of + // XformToShuffleWithZero being used to combine bitmaskings (of + // float vectors bitcast to integer vectors) into shuffles. + // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && + N0->getOpcode() == ISD::VECTOR_SHUFFLE && + VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && + !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); + + // If operands are a bitcast, peek through if it casts the original VT. + // If operands are a UNDEF or constant, just bitcast back to original VT. + auto PeekThroughBitcast = [&](SDValue Op) { + if (Op.getOpcode() == ISD::BITCAST && + Op.getOperand(0)->getValueType(0) == VT) + return SDValue(Op.getOperand(0)); + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + return SDValue(); + }; + + SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); + SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); + if (!(SV0 && SV1)) + return SDValue(); + + int MaskScale = + VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); + SmallVector<int, 8> NewMask; + for (int M : SVN->getMask()) + for (int i = 0; i != MaskScale; ++i) + NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); + + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); + if (!LegalMask) { + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(NewMask); + LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); + } + + if (LegalMask) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); + } + return SDValue(); } @@ -6727,6 +7310,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); } + SDLoc DL(BV); + // Okay, we know the src/dst types are both integers of differing types. // Handling growing first. assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); @@ -6753,16 +7338,15 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (EltIsUndef) Ops.push_back(DAG.getUNDEF(DstEltVT)); else - Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); + Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } // Finally, this must be the case where we are shrinking elements: each input // turns into multiple outputs. - bool isS2V = ISD::isScalarToVector(BV); unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, NumOutputsPerInput*BV->getNumOperands()); @@ -6770,8 +7354,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { - for (unsigned j = 0; j != NumOutputsPerInput; ++j) - Ops.push_back(DAG.getUNDEF(DstEltVT)); + Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); continue; } @@ -6780,11 +7363,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { for (unsigned j = 0; j != NumOutputsPerInput; ++j) { APInt ThisVal = OpVal.trunc(DstBitSize); - Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); - if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) - // Simply turn this into a SCALAR_TO_VECTOR of the new type. - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, - Ops[0]); + Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); OpVal = OpVal.lshr(DstBitSize); } @@ -6793,7 +7372,450 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); +} + +/// Try to perform FMA combining on a given FADD node. +SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + + const TargetOptions &Options = DAG.getTarget().Options; + bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || + Options.UnsafeFPMath); + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && + TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = ((!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && + UnsafeFPMath); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + bool LookThroughFPExt = TLI.isFPExtFree(VT); + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (N0.getOpcode() == ISD::FMUL && + (Aggressive || N0->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FMUL && + (Aggressive || N1->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N1.getOperand(0), N1.getOperand(1), N0); + } + + // Look through FP_EXTEND nodes to do more combining. + if (UnsafeFPMath && LookThroughFPExt) { + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), N1); + } + + // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), N0); + } + } + + // More folding opportunities when target permits. + if ((UnsafeFPMath || HasFMAD) && Aggressive) { + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1)); + } + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (N1->getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0)); + } + + if (UnsafeFPMath && LookThroughFPExt) { + // fold (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + auto FoldFAddFMAFPExtFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (N020.getOpcode() == ISD::FMUL) + return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), + N020.getOperand(0), N020.getOperand(1), + N1); + } + } + + // fold (fadd (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + auto FoldFAddFPExtFMAFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, X), + DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (N002.getOpcode() == ISD::FMUL) + return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), + N002.getOperand(0), N002.getOperand(1), + N1); + } + } + + // fold (fadd x, (fma y, z, (fpext (fmul u, v))) + // -> (fma y, z, (fma (fpext u), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode) { + SDValue N12 = N1.getOperand(2); + if (N12.getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N12.getOperand(0); + if (N120.getOpcode() == ISD::FMUL) + return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), + N120.getOperand(0), N120.getOperand(1), + N0); + } + } + + // fold (fadd x, (fpext (fma y, z, (fmul u, v))) + // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == PreferredFusedOpcode) { + SDValue N102 = N10.getOperand(2); + if (N102.getOpcode() == ISD::FMUL) + return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), + N102.getOperand(0), N102.getOperand(1), + N0); + } + } + } + } + + return SDValue(); +} + +/// Try to perform FMA combining on a given FSUB node. +SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + + const TargetOptions &Options = DAG.getTarget().Options; + bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || + Options.UnsafeFPMath); + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && + TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = ((!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && + UnsafeFPMath); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + bool LookThroughFPExt = TLI.isFPExtFree(VT); + + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (N0.getOpcode() == ISD::FMUL && + (Aggressive || N0->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FMUL && + (Aggressive || N1->hasOneUse())) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + N1.getOperand(0)), + N1.getOperand(1), N0); + + // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && + N0.getOperand(0).getOpcode() == ISD::FMUL && + (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N00), N01, + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // Look through FP_EXTEND nodes to do more combining. + if (UnsafeFPMath && LookThroughFPExt) { + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), + N0); + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fneg (fma (fpext x), (fpext y), z)) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); + } + } + } + + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fneg (fma (fpext x)), (fpext y), z) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); + } + } + } + + } + + // More folding opportunities when target permits. + if ((UnsafeFPMath || HasFMAD) && Aggressive) { + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); + } + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N20), + + N21, N0)); + } + + if (UnsafeFPMath && LookThroughFPExt) { + // fold (fsub (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (N020.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); + } + } + + // fold (fsub (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), + // (fma (fpext u), (fpext v), (fneg z))) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (N002.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); + } + } + + // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) + // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N1.getOperand(2).getOperand(0); + if (N120.getOpcode() == ISD::FMUL) { + SDValue N1200 = N120.getOperand(0); + SDValue N1201 = N120.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1200)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1201), + N0)); + } + } + + // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) + // -> (fma (fneg (fpext y)), (fpext z), + // (fma (fneg (fpext u)), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { + SDValue N100 = N1.getOperand(0).getOperand(0); + SDValue N101 = N1.getOperand(0).getOperand(1); + SDValue N102 = N1.getOperand(0).getOperand(2); + if (N102.getOpcode() == ISD::FMUL) { + SDValue N1020 = N102.getOperand(0); + SDValue N1021 = N102.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N100)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1020)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1021), + N0)); + } + } + } + } + + return SDValue(); } SDValue DAGCombiner::visitFADD(SDNode *N) { @@ -6802,32 +7824,32 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FADD, DL, VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); + return DAG.getNode(ISD::FADD, DL, VT, N1, N0); // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) - return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, + return DAG.getNode(ISD::FSUB, DL, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) - return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, + return DAG.getNode(ISD::FSUB, DL, VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); // If 'unsafe math' is enabled, fold lots of things. @@ -6843,17 +7865,16 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, SDLoc(N), VT, - N0.getOperand(1), N1)); + return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1)); // If allowed, fold (fadd (fneg x), x) -> 0.0 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) - return DAG.getConstantFP(0.0, VT); + return DAG.getConstantFP(0.0, DL, VT); // If allowed, fold (fadd x, (fneg x)) -> 0.0 if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) - return DAG.getConstantFP(0.0, VT); + return DAG.getConstantFP(0.0, DL, VT); // We can fold chains of FADD's of the same value into multiplications. // This transform is not safe in general because we are reducing the number @@ -6865,21 +7886,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP01, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), + DAG.getConstantFP(1.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP); } // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP01, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), + DAG.getConstantFP(2.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP); } } @@ -6889,20 +7907,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP11, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), + DAG.getConstantFP(1.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP); } // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP11, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), + DAG.getConstantFP(2.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP); } } @@ -6910,18 +7926,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP && N0.getOperand(0) == N0.getOperand(1) && - (N0.getOperand(0) == N1)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, DAG.getConstantFP(3.0, VT)); + (N0.getOperand(0) == N1)) { + return DAG.getNode(ISD::FMUL, DL, VT, + N1, DAG.getConstantFP(3.0, DL, VT)); + } } if (N1.getOpcode() == ISD::FADD && AllowNewConst) { ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, DAG.getConstantFP(3.0, VT)); + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, DL, VT, + N0, DAG.getConstantFP(3.0, DL, VT)); + } } // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) @@ -6929,81 +7947,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), DAG.getConstantFP(4.0, VT)); + N0.getOperand(0) == N1.getOperand(0)) { + return DAG.getNode(ISD::FMUL, DL, VT, + N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT)); + } } } // enable-unsafe-fp-math // FADD -> FMA combines: - if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && - (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { - - // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && - (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), N1); - - // fold (fadd x, (fmul y, z)) -> (fma y, z, x) - // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && - (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(0), N1.getOperand(1), N0); - - // When FP_EXTEND nodes are free on the target, and there is an opportunity - // to combine into FMA, arrange such nodes accordingly. - if (TLI.isFPExtFree(VT)) { - - // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N00.getOperand(1)), N1); - } - - // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) - // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N10.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N10.getOperand(1)), N0); - } - } - - // More folding opportunities when target permits. - if (TLI.enableAggressiveFMAFusion(VT)) { - - // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (N0.getOpcode() == ISD::FMA && - N0.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(2).getOperand(0), - N0.getOperand(2).getOperand(1), - N1)); - - // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (N1->getOpcode() == ISD::FMA && - N1.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(0), N1.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(2).getOperand(0), - N1.getOperand(2).getOperand(1), - N0)); - } + SDValue Fused = visitFADDForFMACombine(N); + if (Fused) { + AddToWorklist(Fused.getNode()); + return Fused; } return SDValue(); @@ -7019,14 +7974,13 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FSUB, dl, VT, N0, N1); // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) @@ -7049,7 +8003,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub x, x) -> 0.0 if (N0 == N1) - return DAG.getConstantFP(0.0f, VT); + return DAG.getConstantFP(0.0f, dl, VT); // (fsub x, (fadd x, y)) -> (fneg y) // (fsub x, (fadd y, x)) -> (fneg y) @@ -7066,138 +8020,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // FSUB -> FMA combines: - if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && - (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { - - // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && - (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, dl, VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FNEG, dl, VT, N1)); - - // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) - // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && - (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, - N1.getOperand(0)), - N1.getOperand(1), N0); - - // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) - if (N0.getOpcode() == ISD::FNEG && - N0.getOperand(0).getOpcode() == ISD::FMUL && - ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) || - TLI.enableAggressiveFMAFusion(VT))) { - SDValue N00 = N0.getOperand(0).getOperand(0); - SDValue N01 = N0.getOperand(0).getOperand(1); - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, N00), N01, - DAG.getNode(ISD::FNEG, dl, VT, N1)); - } - - // When FP_EXTEND nodes are free on the target, and there is an opportunity - // to combine into FMA, arrange such nodes accordingly. - if (TLI.isFPExtFree(VT)) { - - // fold (fsub (fpext (fmul x, y)), z) - // -> (fma (fpext x), (fpext y), (fneg z)) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N00.getOperand(1)), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); - } - - // fold (fsub x, (fpext (fmul y, z))) - // -> (fma (fneg (fpext y)), (fpext z), x) - // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), - VT, N10.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N10.getOperand(1)), - N0); - } - - // fold (fsub (fpext (fneg (fmul, x, y))), z) - // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FNEG) { - SDValue N000 = N00.getOperand(0); - if (N000.getOpcode() == ISD::FMUL) { - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), - VT, N000.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N000.getOperand(1)), - DAG.getNode(ISD::FNEG, dl, VT, N1)); - } - } - } - - // fold (fsub (fneg (fpext (fmul, x, y))), z) - // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) - if (N0.getOpcode() == ISD::FNEG) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FP_EXTEND) { - SDValue N000 = N00.getOperand(0); - if (N000.getOpcode() == ISD::FMUL) { - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), - VT, N000.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N000.getOperand(1)), - DAG.getNode(ISD::FNEG, dl, VT, N1)); - } - } - } - } - - // More folding opportunities when target permits. - if (TLI.enableAggressiveFMAFusion(VT)) { - - // fold (fsub (fma x, y, (fmul u, v)), z) - // -> (fma x, y (fma u, v, (fneg z))) - if (N0.getOpcode() == ISD::FMA && - N0.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(2).getOperand(0), - N0.getOperand(2).getOperand(1), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N1))); - - // fold (fsub x, (fma y, z, (fmul u, v))) - // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (N1.getOpcode() == ISD::FMA && - N1.getOperand(2).getOpcode() == ISD::FMUL) { - SDValue N20 = N1.getOperand(2).getOperand(0); - SDValue N21 = N1.getOperand(2).getOperand(1); - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N1.getOperand(0)), - N1.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N20), - N21, N0)); - } - } + SDValue Fused = visitFSUBForFMACombine(N); + if (Fused) { + AddToWorklist(Fused.getNode()); + return Fused; } return SDValue(); @@ -7209,29 +8035,24 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); + SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { // This just handles C1 * C2 for vectors. Other vector folds are below. - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - // Canonicalize vector constant to RHS. - if (N0.getOpcode() == ISD::BUILD_VECTOR && - N1.getOpcode() != ISD::BUILD_VECTOR) - if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0)) - if (BV0->isConstant()) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); } // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N0, N1); // canonicalize constant to RHS - if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) + return DAG.getNode(ISD::FMUL, DL, VT, N1, N0); // fold (fmul A, 1.0) -> A if (N1CFP && N1CFP->isExactlyValue(1.0)) @@ -7247,14 +8068,22 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // Fold scalars or any vector constants (not just splats). // This fold is done in general by InstCombine, but extra fmul insts // may have been generated during lowering. + SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + auto *BV00 = dyn_cast<BuildVectorSDNode>(N00); auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); - if ((N1CFP && isConstOrConstSplatFP(N01)) || - (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { - SDLoc SL(N); - SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); - return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); + + // Check 1: Make sure that the first operand of the inner multiply is NOT + // a constant. Otherwise, we may induce infinite looping. + if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { + // Check 2: Make sure that the second operand of the inner multiply and + // the second operand of the outer multiply are constants. + if ((N1CFP && isConstOrConstSplatFP(N01)) || + (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts); + } } } @@ -7263,21 +8092,20 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // during an early run of DAGCombiner can prevent folding with fmuls // inserted during lowering. if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { - SDLoc SL(N); - const SDValue Two = DAG.getConstantFP(2.0, VT); - SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts); + const SDValue Two = DAG.getConstantFP(2.0, DL, VT); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts); } } // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); + return DAG.getNode(ISD::FADD, DL, VT, N0, N0); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); + return DAG.getNode(ISD::FNEG, DL, VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { @@ -7285,7 +8113,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + return DAG.getNode(ISD::FMUL, DL, VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations)); } @@ -7364,14 +8192,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (Options.UnsafeFPMath && N1CFP && N0 == N2) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(1.0, VT))); + N1, DAG.getConstantFP(1.0, dl, VT))); // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (Options.UnsafeFPMath && N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(-1.0, VT))); + N1, DAG.getConstantFP(-1.0, dl, VT))); return SDValue(); @@ -7387,10 +8215,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) @@ -7412,8 +8239,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, - DAG.getConstantFP(Recip, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getConstantFP(Recip, DL, VT)); } // If this FDIV is part of a reciprocal square root, it may be folded @@ -7492,24 +8319,22 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SmallVector<SDNode *, 4> Users; // Find all FDIV users of the same divisor. - for (SDNode::use_iterator UI = N1.getNode()->use_begin(), - UE = N1.getNode()->use_end(); - UI != UE; ++UI) { - SDNode *User = UI.getUse().getUser(); - if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1) - Users.push_back(User); + for (auto *U : N1->uses()) { + if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) + Users.push_back(U); } if (TLI.combineRepeatedFPDivisors(Users.size())) { - SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0 - SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); + SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1); // Dividend / Divisor -> Dividend * Reciprocal - for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { - if ((*I)->getOperand(0) != FPOne) { - SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT, - (*I)->getOperand(0), Reciprocal); - DAG.ReplaceAllUsesWith(*I, NewNode.getNode()); + for (auto *U : Users) { + SDValue Dividend = U->getOperand(0); + if (Dividend != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, + Reciprocal); + DAG.ReplaceAllUsesWith(U, NewNode.getNode()); } } return SDValue(); @@ -7539,20 +8364,21 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { EVT VT = RV.getValueType(); - RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV); + SDLoc DL(N); + RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); AddToWorklist(RV.getNode()); // Unfortunately, RV is now NaN if the input was exactly 0. // Select out this case and force the answer to 0. - SDValue Zero = DAG.getConstantFP(0.0, VT); + SDValue Zero = DAG.getConstantFP(0.0, DL, VT); SDValue ZeroCmp = - DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT), + DAG.getSetCC(DL, TLI.getSetCCResultType(*DAG.getContext(), VT), N->getOperand(0), Zero, ISD::SETEQ); AddToWorklist(ZeroCmp.getNode()); AddToWorklist(RV.getNode()); RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, - SDLoc(N), VT, ZeroCmp, Zero, RV); + DL, VT, ZeroCmp, Zero, RV); return RV; } } @@ -7611,12 +8437,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (N0C && + if (isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -7638,11 +8463,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), - DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), + DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> @@ -7651,11 +8477,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), - DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), + DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } } @@ -7664,12 +8491,11 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (N0C && + if (isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -7691,39 +8517,82 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), - DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), + DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } } return SDValue(); } +// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x +static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP) + return SDValue(); + + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; + bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; + + // We can safely assume the conversion won't overflow the output range, + // because (for example) (uint8_t)18293.f is undefined behavior. + + // Since we can assume the conversion won't overflow, our decision as to + // whether the input will fit in the float should depend on the minimum + // of the input range and output range. + + // This means this is also safe for a signed input and unsigned output, since + // a negative input would lead to undefined behavior. + unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; + unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; + unsigned ActualSize = std::min(InputSize, OutputSize); + const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); + + // We can only fold away the float conversion if the input range can be + // represented exactly in the float range. + if (APFloat::semanticsPrecision(sem) >= ActualSize) { + if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) { + unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + return DAG.getNode(ExtOp, SDLoc(N), VT, Src); + } + if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); + if (SrcVT == VT) + return Src; + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src); + } + return SDValue(); +} + SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (fp_to_sint c1fp) -> c1 - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); - return SDValue(); + return FoldIntToFPToInt(N, DAG); } SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); - return SDValue(); + return FoldIntToFPToInt(N, DAG); } SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { @@ -7742,11 +8611,18 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // fold (fp_round (fp_round x)) -> (fp_round x) if (N0.getOpcode() == ISD::FP_ROUND) { - // This is a value preserving truncation if both round's are. - bool IsTrunc = N->getConstantOperandVal(1) == 1 && - N0.getNode()->getConstantOperandVal(1) == 1; - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), - DAG.getIntPtrConstant(IsTrunc)); + const bool NIsTrunc = N->getConstantOperandVal(1) == 1; + const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; + // If the first fp_round isn't a value preserving truncation, it might + // introduce a tie in the second fp_round, that wouldn't occur in the + // single-step fp_round we want to fold to. + // In other words, double rounding isn't the same as rounding. + // Also, this is a value preserving truncation iff both fp_round's are. + if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { + SDLoc DL(N); + return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), + DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); + } } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) @@ -7769,8 +8645,9 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { // fold (fp_round_inreg c1fp) -> c1fp if (N0CFP && isTypeLegal(EVT)) { - SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); - return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); + SDLoc DL(N); + SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); + return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); } return SDValue(); @@ -7778,7 +8655,6 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. @@ -7787,9 +8663,14 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); // fold (fp_extend c1fp) -> c1fp - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); + // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) + if (N0.getOpcode() == ISD::FP16_TO_FP && + TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); + // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. if (N0.getOpcode() == ISD::FP_ROUND @@ -7813,7 +8694,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::FP_ROUND, SDLoc(N0), - N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), + N0.getValueType(), ExtLoad, + DAG.getIntPtrConstant(1, SDLoc(N0))), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -7823,11 +8705,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue DAGCombiner::visitFCEIL(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (fceil c1) -> fceil(c1) - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); return SDValue(); @@ -7835,11 +8716,10 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { SDValue DAGCombiner::visitFTRUNC(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (ftrunc c1) -> ftrunc(c1) - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); return SDValue(); @@ -7847,11 +8727,10 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { SDValue DAGCombiner::visitFFLOOR(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (ffloor c1) -> ffloor(c1) - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); return SDValue(); @@ -7862,14 +8741,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVUnaryOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } - // Constant fold FNEG. - if (isa<ConstantFPSDNode>(N0)) - return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0)); + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) @@ -7893,8 +8767,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // For a scalar, just generate 0x80... SignMask = APInt::getSignBit(IntVT.getSizeInBits()); } - Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, - DAG.getConstant(SignMask, IntVT)); + SDLoc DL0(N0); + Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, + DAG.getConstant(SignMask, DL0, IntVT)); AddToWorklist(Int.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } @@ -7927,7 +8802,7 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) { if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0)); + return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0)); } if (N0CFP) { @@ -7948,7 +8823,7 @@ SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0)); + return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0)); } if (N0CFP) { @@ -7964,13 +8839,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVUnaryOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } - // fold (fabs c1) -> fabs(c1) - if (isa<ConstantFPSDNode>(N0)) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // fold (fabs (fabs x)) -> (fabs x) @@ -8000,8 +8870,9 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // For a scalar, just generate 0x7f... SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); } - Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, - DAG.getConstant(SignMask, IntVT)); + SDLoc DL(N0); + Int = DAG.getNode(ISD::AND, DL, IntVT, Int, + DAG.getConstant(SignMask, DL, IntVT)); AddToWorklist(Int.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } @@ -8071,13 +8942,14 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (AndConst.isPowerOf2() && cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { + SDLoc DL(N); SDValue SetCC = - DAG.getSetCC(SDLoc(N), + DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), - Op0, DAG.getConstant(0, Op0.getValueType()), + Op0, DAG.getConstant(0, DL, Op0.getValueType()), ISD::SETNE); - SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, Chain, SetCC, N2); // Don't add the new BRCond into the worklist or else SimplifySelectCC // will convert it back to (X & C1) >> C2. @@ -8130,12 +9002,11 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; - if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) - if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && - Op0.getOpcode() == ISD::XOR) { - TheXor = Op0.getNode(); - Equal = true; - } + if (isOneConstant(Op0) && Op0.hasOneUse() && + Op0.getOpcode() == ISD::XOR) { + TheXor = Op0.getNode(); + Equal = true; + } EVT SetCCVT = N1.getValueType(); if (LegalTypes) @@ -8193,11 +9064,11 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; - VT = Use->getValueType(0); + VT = LD->getMemoryVT(); } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; - VT = ST->getValue().getValueType(); + VT = ST->getMemoryVT(); } else return false; @@ -8280,8 +9151,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { } // Don't create a indexed load / store with zero offset. - if (isa<ConstantSDNode>(Offset) && - cast<ConstantSDNode>(Offset)->isNullValue()) + if (isNullConstant(Offset)) return false; // Try turning it into a pre-indexed load / store except when: @@ -8309,24 +9179,25 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // a copy of the original base pointer. SmallVector<SDNode *, 16> OtherUses; if (isa<ConstantSDNode>(Offset)) - for (SDNode *Use : BasePtr.getNode()->uses()) { - if (Use == Ptr.getNode()) + for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), + UE = BasePtr.getNode()->use_end(); + UI != UE; ++UI) { + SDUse &Use = UI.getUse(); + // Skip the use that is Ptr and uses of other results from BasePtr's + // node (important for nodes that return multiple results). + if (Use.getUser() == Ptr.getNode() || Use != BasePtr) continue; - if (Use->isPredecessorOf(N)) + if (Use.getUser()->isPredecessorOf(N)) continue; - if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { + if (Use.getUser()->getOpcode() != ISD::ADD && + Use.getUser()->getOpcode() != ISD::SUB) { OtherUses.clear(); break; } - SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); - if (Op1.getNode() == BasePtr.getNode()) - std::swap(Op0, Op1); - assert(Op0.getNode() == BasePtr.getNode() && - "Use of ADD/SUB but not an operand"); - + SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); if (!isa<ConstantSDNode>(Op1)) { OtherUses.clear(); break; @@ -8338,7 +9209,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { break; } - OtherUses.push_back(Use); + OtherUses.push_back(Use.getUser()); } if (Swapped) @@ -8431,12 +9302,14 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; else CNV = CNV - Offset1; + SDLoc DL(OtherUses[i]); + // We can now generate the new expression. - SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0)); + SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, - SDLoc(OtherUses[i]), + DL, OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); deleteAndRecombine(OtherUses[i]); @@ -8494,8 +9367,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { ISD::MemIndexedMode AM = ISD::UNINDEXED; if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { // Don't create a indexed load / store with zero offset. - if (isa<ConstantSDNode>(Offset) && - cast<ConstantSDNode>(Offset)->isNullValue()) + if (isNullConstant(Offset)) continue; // Try turning it into a post-indexed load / store except when @@ -8585,7 +9457,7 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { "Cannot split out indexing using opaque target constants"); if (Inc.getOpcode() == ISD::TargetConstant) { ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); - Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), + Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), ConstInc->getValueType(0)); } @@ -8686,7 +9558,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), Align, LD->getAAInfo()); - return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + if (NewLoad.getNode() != N) + return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } @@ -8851,9 +9724,6 @@ struct LoadedSlice { unsigned Shift = 0, SelectionDAG *DAG = nullptr) : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} - LoadedSlice(const LoadedSlice &LS) - : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} - /// \brief Get the bits used in a chunk of bits \p BitWidth large. /// \return Result is \p BitWidth and has used bits set to 1 and /// not used bits set to 0. @@ -8980,8 +9850,9 @@ struct LoadedSlice { if (Offset) { // BaseAddr = BaseAddr + Offset. EVT ArithType = BaseAddr.getValueType(); - BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, - DAG->getConstant(Offset, ArithType)); + SDLoc DL(Origin); + BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, + DAG->getConstant(Offset, DL, ArithType)); } // Create the type of the loaded slice according to its size. @@ -9336,7 +10207,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { if (NotMaskLZ == 64) return Result; // All zero mask. // See if we have a continuous run of bits. If so, we have 0*1+0* - if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) + if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) return Result; // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. @@ -9387,10 +10258,12 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. - if (ByteShift) - IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, - DAG.getConstant(ByteShift*8, + if (ByteShift) { + SDLoc DL(IVal); + IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, + DAG.getConstant(ByteShift*8, DL, DC->getShiftAmountTy(IVal.getValueType()))); + } // Figure out the offset for the store and the alignment of the access. unsigned StOffset; @@ -9403,8 +10276,9 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue Ptr = St->getBasePtr(); if (StOffset) { - Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), - Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); + SDLoc DL(IVal); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), + Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); NewAlign = MinAlign(NewAlign, StOffset); } @@ -9486,8 +10360,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { // The narrowing should be profitable, the load/store operation should be // legal (or custom) and the store size should be equal to the NewVT width. while (NewBW < BitWidth && - !(TLI.isOperationLegalOrCustom(Opc, NewVT) && - TLI.isNarrowingProfitable(VT, NewVT))) { + (NewVT.getStoreSizeInBits() != NewBW || + !TLI.isOperationLegalOrCustom(Opc, NewVT) || + !TLI.isNarrowingProfitable(VT, NewVT))) { NewBW = NextPowerOf2(NewBW); NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); } @@ -9517,7 +10392,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), Ptr.getValueType(), Ptr, - DAG.getConstant(PtrOff, Ptr.getValueType())); + DAG.getConstant(PtrOff, SDLoc(LD), + Ptr.getValueType())); SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), @@ -9525,7 +10401,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->isInvariant(), NewAlign, LD->getAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, - DAG.getConstant(NewImm, NewVT)); + DAG.getConstant(NewImm, SDLoc(Value), + NewVT)); SDValue NewST = DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, ST->getPointerInfo().getWithOffset(PtrOff), @@ -9599,6 +10476,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } +namespace { /// Helper struct to parse and store a memory address as base + index + offset. /// We ignore sign extensions when it is safe to do so. /// The following two expressions are not equivalent. To differentiate we need @@ -9686,37 +10564,156 @@ struct BaseIndexOffset { return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); } }; +} // namespace -/// Holds a pointer to an LSBaseSDNode as well as information on where it -/// is located in a sequence of memory operations connected by a chain. -struct MemOpLink { - MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): - MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } - // Ptr to the mem node. - LSBaseSDNode *MemNode; - // Offset from the base ptr. - int64_t OffsetFromBase; - // What is the sequence number of this mem node. - // Lowest mem operand in the DAG starts at zero. - unsigned SequenceNum; -}; +bool DAGCombiner::MergeStoresOfConstantsOrVecElts( + SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, + unsigned NumElem, bool IsConstantSrc, bool UseVector) { + // Make sure we have something to merge. + if (NumElem < 2) + return false; + + int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned LatestNodeUsed = 0; + + for (unsigned i=0; i < NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // latest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) + LatestNodeUsed = i; + } + + // The latest Node in the DAG. + LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; + SDLoc DL(StoreNodes[0].MemNode); + + SDValue StoredVal; + if (UseVector) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + if (IsConstantSrc) { + // A vector store with a constant source implies that the constant is + // zero; we only handle merging stores of constant zeros because the zero + // can be materialized without a load. + // It may be beneficial to loosen this restriction to allow non-zero + // store merging. + StoredVal = DAG.getConstant(0, DL, Ty); + } else { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < NumElem ; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue Val = St->getValue(); + // All of the operands of a BUILD_VECTOR must have the same type. + if (Val.getValueType() != MemVT) + return false; + Ops.push_back(Val); + } + + // Build the extracted vector elements back into a vector. + StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops); + } + } else { + // We should always use a vector store when merging extracted vector + // elements, so this path implies a store of constants. + assert(IsConstantSrc && "Merged vector elements should use vector store"); + + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + APInt StoreInt(StoreBW, 0); + + // Construct a single integer constant which is made of the smaller + // constant inputs. + bool IsLE = TLI.isLittleEndian(); + for (unsigned i = 0; i < NumElem ; ++i) { + unsigned Idx = IsLE ? (NumElem - 1 - i) : i; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); + SDValue Val = St->getValue(); + StoreInt <<= ElementSizeBytes*8; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { + StoreInt |= C->getAPIntValue().zext(StoreBW); + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { + StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW); + } else { + llvm_unreachable("Invalid constant element type"); + } + } + + // Create the new Load and Store operations. + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); + } + + SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + false, false, + FirstInChain->getAlignment()); + + // Replace the last store with the new store + CombineTo(LatestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + if (StoreNodes[i].MemNode == LatestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + // ReplaceAllUsesWith will replace all uses that existed when it was + // called, but graph optimizations may cause new ones to appear. For + // example, the case in pr14333 looks like + // + // St's chain -> St -> another store -> X + // + // And the only difference from St to the other store is the chain. + // When we change it's chain to be St's chain they become identical, + // get CSEed and the net result is that X is now a use of St. + // Since we know that St is redundant, just iterate. + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } + + return true; +} + +static bool allowableAlignment(const SelectionDAG &DAG, + const TargetLowering &TLI, EVT EVTTy, + unsigned AS, unsigned Align) { + if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align)) + return true; + + Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getDataLayout()->getPrefTypeAlignment(Ty); + return (Align >= ABIAlignment); +} bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { + if (OptLevel == CodeGenOpt::None) + return false; + EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; - bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); + bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::NoImplicitFloat); + + // This function cannot currently deal with non-byte-sized memory sizes. + if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) + return false; // Don't merge vectors into wider inputs. if (MemVT.isVector() || !MemVT.isSimple()) return false; // Perform an early exit check. Do not bother looking at stored values that - // are not constants or loads. + // are not constants, loads, or extracted vector elements. SDValue StoredVal = St->getValue(); bool IsLoadSrc = isa<LoadSDNode>(StoredVal); - if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && - !IsLoadSrc) + bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || + isa<ConstantFPSDNode>(StoredVal); + bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); + + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) return false; // Only look at ends of store sequences. @@ -9761,10 +10758,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (!Ptr.equalBaseIndex(BasePtr)) break; - // Check that the alignment is the same. - if (Index->getAlignment() != St->getAlignment()) - break; - // The memory operands must not be volatile. if (Index->isVolatile() || Index->isIndexed()) break; @@ -9778,11 +10771,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Index->getMemoryVT() != MemVT) break; - // We do not allow unaligned stores because we want to prevent overriding - // stores. - if (Index->getAlignment()*8 != MemVT.getSizeInBits()) - break; - // We found a potential memory operand to merge. StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); @@ -9856,9 +10844,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // The node with the lowest store address. LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); // Store the constants into memory as one consecutive store. - if (!IsLoadSrc) { + if (IsConstantSrc) { unsigned LastLegalType = 0; unsigned LastLegalVectorType = 0; bool NonZero = false; @@ -9878,27 +10868,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find a legal type for the constant store. unsigned StoreBW = (i+1) * ElementSizeBytes * 8; EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - if (TLI.isTypeLegal(StoreTy)) + if (TLI.isTypeLegal(StoreTy) && + allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, + FirstStoreAlign)) { LastLegalType = i+1; // Or check whether a truncstore is legal. - else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == - TargetLowering::TypePromoteInteger) { + } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); - if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy)) - LastLegalType = i+1; + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, + FirstStoreAlign)) { + LastLegalType = i + 1; + } } // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); - if (TLI.isTypeLegal(Ty)) + if (TLI.isTypeLegal(Ty) && + allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) { LastLegalVectorType = i + 1; + } } - // We only use vectors if the constant is known to be zero and the - // function is not marked with the noimplicitfloat attribute. - if (NonZero || NoVectors) + + // We only use vectors if the constant is known to be zero or the target + // allows it and the function is not marked with the noimplicitfloat + // attribute. + if (NoVectors) { LastLegalVectorType = 0; + } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT, + LastLegalVectorType, + FirstStoreAS)) { + LastLegalVectorType = 0; + } // Check if we found a legal integer type to store. if (LastLegalType == 0 && LastLegalVectorType == 0) @@ -9907,85 +10911,34 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; - // Make sure we have something to merge. - if (NumElem < 2) - return false; - - unsigned EarliestNodeUsed = 0; - for (unsigned i=0; i < NumElem; ++i) { - // Find a chain for the new wide-store operand. Notice that some - // of the store nodes that we found may not be selected for inclusion - // in the wide store. The chain we use needs to be the chain of the - // earliest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) - EarliestNodeUsed = i; - } + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + true, UseVector); + } - // The earliest Node in the DAG. - LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; - SDLoc DL(StoreNodes[0].MemNode); + // When extracting multiple vector elements, try to store them + // in one vector store rather than a sequence of scalar stores. + if (IsExtractVecEltSrc) { + unsigned NumElem = 0; + for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = St->getValue(); + // This restriction could be loosened. + // Bail out if any stored values are not elements extracted from a vector. + // It should be possible to handle mixed sources, but load sources need + // more careful handling (see the block of code below that handles + // consecutive loads). + if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return false; - SDValue StoredVal; - if (UseVector) { // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); - assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); - StoredVal = DAG.getConstant(0, Ty); - } else { - unsigned StoreBW = NumElem * ElementSizeBytes * 8; - APInt StoreInt(StoreBW, 0); - - // Construct a single integer constant which is made of the smaller - // constant inputs. - bool IsLE = TLI.isLittleEndian(); - for (unsigned i = 0; i < NumElem ; ++i) { - unsigned Idx = IsLE ?(NumElem - 1 - i) : i; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); - SDValue Val = St->getValue(); - StoreInt<<=ElementSizeBytes*8; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { - StoreInt|=C->getAPIntValue().zext(StoreBW); - } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { - StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); - } else { - llvm_unreachable("Invalid constant element type"); - } - } - - // Create the new Load and Store operations. - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - StoredVal = DAG.getConstant(StoreInt, StoreTy); - } - - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - false, false, - FirstInChain->getAlignment()); - - // Replace the first store with the new store - CombineTo(EarliestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumElem ; ++i) { - if (StoreNodes[i].MemNode == EarliestOp) - continue; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - // ReplaceAllUsesWith will replace all uses that existed when it was - // called, but graph optimizations may cause new ones to appear. For - // example, the case in pr14333 looks like - // - // St's chain -> St -> another store -> X - // - // And the only difference from St to the other store is the chain. - // When we change it's chain to be St's chain they become identical, - // get CSEed and the net result is that X is now a use of St. - // Since we know that St is redundant, just iterate. - while (!St->use_empty()) - DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(Ty) && + allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) + NumElem = i + 1; } - return true; + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + false, true); } // Below we handle the case of multiple consecutive stores that @@ -10007,10 +10960,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (!Ld->hasNUsesOfValue(1, 0)) break; - // Check that the alignment is the same as the stores. - if (Ld->getAlignment() != St->getAlignment()) - break; - // The memory operands must not be volatile. if (Ld->isVolatile() || Ld->isIndexed()) break; @@ -10048,6 +10997,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { St->getAlignment() >= RequiredAlignment) return false; + LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); + unsigned FirstLoadAS = FirstLoad->getAddressSpace(); + unsigned FirstLoadAlign = FirstLoad->getAlignment(); + // Scan the memory operations on the chain and find the first non-consecutive // load memory address. These variables hold the index in the store node // array. @@ -10056,7 +11009,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { unsigned LastLegalVectorType = 0; unsigned LastLegalIntegerType = 0; StartAddress = LoadNodes[0].OffsetFromBase; - SDValue FirstChain = LoadNodes[0].MemNode->getChain(); + SDValue FirstChain = FirstLoad->getChain(); for (unsigned i = 1; i < LoadNodes.size(); ++i) { // All loads much share the same chain. if (LoadNodes[i].MemNode->getChain() != FirstChain) @@ -10069,13 +11022,18 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find a legal type for the vector store. EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); - if (TLI.isTypeLegal(StoreTy)) + if (TLI.isTypeLegal(StoreTy) && + allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && + allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) { LastLegalVectorType = i + 1; + } // Find a legal type for the integer store. unsigned StoreBW = (i+1) * ElementSizeBytes * 8; StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - if (TLI.isTypeLegal(StoreTy)) + if (TLI.isTypeLegal(StoreTy) && + allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && + allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) LastLegalIntegerType = i + 1; // Or check whether a truncstore and extload is legal. else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == @@ -10085,7 +11043,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy)) + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && + allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, + FirstStoreAlign) && + allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS, + FirstLoadAlign)) LastLegalIntegerType = i+1; } } @@ -10103,18 +11065,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (NumElem < 2) return false; - // The earliest Node in the DAG. - unsigned EarliestNodeUsed = 0; - LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + // The latest Node in the DAG. + unsigned LatestNodeUsed = 0; for (unsigned i=1; i<NumElem; ++i) { // Find a chain for the new wide-store operand. Notice that some // of the store nodes that we found may not be selected for inclusion // in the wide store. The chain we use needs to be the chain of the - // earliest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) - EarliestNodeUsed = i; + // latest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) + LatestNodeUsed = i; } + LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; + // Find if it is better to use vectors or integers to load and store // to memory. EVT JointMemOpVT; @@ -10128,18 +11091,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { SDLoc LoadDL(LoadNodes[0].MemNode); SDLoc StoreDL(StoreNodes[0].MemNode); - LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); - SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, - FirstLoad->getChain(), - FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), - false, false, false, - FirstLoad->getAlignment()); - - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), false, false, - FirstInChain->getAlignment()); + SDValue NewLoad = DAG.getLoad( + JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + + SDValue NewStore = DAG.getStore( + LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); // Replace one of the loads with the new load. LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); @@ -10154,12 +11112,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); } - // Replace the first store with the new store. - CombineTo(EarliestOp, NewStore); + // Replace the last store with the new store. + CombineTo(LatestOp, NewStore); // Erase all other stores. for (unsigned i = 0; i < NumElem ; ++i) { // Remove all Store nodes. - if (StoreNodes[i].MemNode == EarliestOp) + if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); @@ -10214,8 +11172,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { case MVT::f32: if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). - bitcastToAPInt().getZExtValue(), MVT::i32); + bitcastToAPInt().getZExtValue(), SDLoc(CFP), + MVT::i32); return DAG.getStore(Chain, SDLoc(N), Tmp, Ptr, ST->getMemOperand()); } @@ -10224,8 +11184,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). - getZExtValue(), MVT::i64); + getZExtValue(), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, SDLoc(N), Tmp, Ptr, ST->getMemOperand()); } @@ -10236,8 +11197,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); - SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); if (TLI.isBigEndian()) std::swap(Lo, Hi); unsigned Alignment = ST->getAlignment(); @@ -10245,18 +11206,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { bool isNonTemporal = ST->isNonTemporal(); AAMDNodes AAInfo = ST->getAAInfo(); + SDLoc DL(N); + SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, ST->getAlignment(), AAInfo); - Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, - DAG.getConstant(4, Ptr.getValueType())); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, DL, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, Alignment, AAInfo); - return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, St0, St1); } @@ -10268,11 +11231,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > ST->getAlignment()) - return DAG.getTruncStore(Chain, SDLoc(N), Value, + if (Align > ST->getAlignment()) { + SDValue NewStore = + DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), Align, ST->getAAInfo()); + if (NewStore.getNode() != N) + return CombineTo(ST, NewStore, true); + } } } @@ -10493,24 +11460,20 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( SDValue Offset; EVT PtrType = NewPtr.getValueType(); MachinePointerInfo MPI; + SDLoc DL(EVE); if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { int Elt = ConstEltNo->getZExtValue(); unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; - if (TLI.isBigEndian()) - PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; - Offset = DAG.getConstant(PtrOff, PtrType); + Offset = DAG.getConstant(PtrOff, DL, PtrType); MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); } else { + Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); Offset = DAG.getNode( - ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, - DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); - if (TLI.isBigEndian()) - Offset = DAG.getNode( - ISD::SUB, SDLoc(EVE), EltNo.getValueType(), - DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); + ISD::MUL, DL, PtrType, Offset, + DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); MPI = OriginalLoad->getPointerInfo(); } - NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); + NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); // The replacement we need to do here is a little tricky: we need to // replace an extractelement of a load with a load. @@ -10620,8 +11583,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (!LegalOperations) { EVT IndexTy = TLI.getVectorIdxTy(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, - SVInVec, DAG.getConstant(OrigElt, IndexTy)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, + DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); } } @@ -10710,7 +11673,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; - EltNo = DAG.getConstant(Elt, EltNo.getValueType()); + EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType()); } } @@ -10800,7 +11763,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): - DAG.getConstant(0, SourceType); + DAG.getConstant(0, SDLoc(N), SourceType); unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops(NewBVElems, Filler); @@ -10890,6 +11853,11 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) return SDValue(); + // Just because the floating-point vector type is legal does not necessarily + // mean that the corresponding integer vector type is. + if (!isTypeLegal(NVT)) + return SDValue(); + SmallVector<SDValue, 8> Opnds; for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); @@ -10914,12 +11882,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); - SDValue V = reduceBuildVecExtToExtBuildVec(N); - if (V.getNode()) + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; - V = reduceBuildVecConvertToConvertBuildVec(N); - if (V.getNode()) + if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT @@ -10942,8 +11908,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (Op.getOpcode() == ISD::UNDEF) continue; // See if we can combine this build_vector into a blend with a zero vector. - if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Op.getNode())->isNullValue()) || + if (!VecIn2.getNode() && (isNullConstant(Op) || (Op.getOpcode() == ISD::ConstantFP && cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) { UsesZeroVector = true; @@ -11047,20 +12012,20 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) return SDValue(); - + // Try to replace VecIn1 with two extract_subvectors // No need to update the masks, they should still be correct. - VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); + VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(VT.getVectorNumElements(), dl, TLI.getVectorIdxTy())); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); } else return SDValue(); } if (UsesZeroVector) - VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) : - DAG.getConstantFP(0.0, VT); + VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) : + DAG.getConstantFP(0.0, dl, VT); else // If VecIn2 is unused then change it to undef. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); @@ -11081,6 +12046,68 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { return SDValue(); } +static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT OpVT = N->getOperand(0).getValueType(); + + // If the operands are legal vectors, leave them alone. + if (TLI.isTypeLegal(OpVT)) + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + SmallVector<SDValue, 8> Ops; + + EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + + // Keep track of what we encounter. + bool AnyInteger = false; + bool AnyFP = false; + for (const SDValue &Op : N->ops()) { + if (ISD::BITCAST == Op.getOpcode() && + !Op.getOperand(0).getValueType().isVector()) + Ops.push_back(Op.getOperand(0)); + else if (ISD::UNDEF == Op.getOpcode()) + Ops.push_back(ScalarUndef); + else + return SDValue(); + + // Note whether we encounter an integer or floating point scalar. + // If it's neither, bail out, it could be something weird like x86mmx. + EVT LastOpVT = Ops.back().getValueType(); + if (LastOpVT.isFloatingPoint()) + AnyFP = true; + else if (LastOpVT.isInteger()) + AnyInteger = true; + else + return SDValue(); + } + + // If any of the operands is a floating point scalar bitcast to a vector, + // use floating point types throughout, and bitcast everything. + // Replace UNDEFs by another scalar UNDEF node, of the final desired type. + if (AnyFP) { + SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); + ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + if (AnyInteger) { + for (SDValue &Op : Ops) { + if (Op.getValueType() == SVT) + continue; + if (Op.getOpcode() == ISD::UNDEF) + Op = ScalarUndef; + else + Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op); + } + } + } + + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, + VT.getSizeInBits() / SVT.getSizeInBits()); + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector @@ -11096,9 +12123,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); - // Optimize concat_vectors where one of the vectors is undef. - if (N->getNumOperands() == 2 && - N->getOperand(1)->getOpcode() == ISD::UNDEF) { + // Optimize concat_vectors where all but the first of the vectors are undef. + if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { + return Op.getOpcode() == ISD::UNDEF; + })) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); @@ -11106,6 +12134,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (In->getOpcode() == ISD::BITCAST && !In->getOperand(0)->getValueType(0).isVector()) { SDValue Scalar = In->getOperand(0); + + // If the bitcast type isn't legal, it might be a trunc of a legal type; + // look through the trunc so we can still do the transform: + // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) + if (Scalar->getOpcode() == ISD::TRUNCATE && + !TLI.isTypeLegal(Scalar.getValueType()) && + TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) + Scalar = Scalar->getOperand(0); + EVT SclTy = Scalar->getValueType(0); if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) @@ -11122,39 +12159,61 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } } + // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. + // We have already tested above for an UNDEF only concatenation. // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) // -> (BUILD_VECTOR A, B, ..., C, D, ...) - if (N->getNumOperands() == 2 && - N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && - N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { - EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); + auto IsBuildVectorOrUndef = [](const SDValue &Op) { + return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); + }; + bool AllBuildVectorsOrUndefs = + std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef); + if (AllBuildVectorsOrUndefs) { SmallVector<SDValue, 8> Opnds; - unsigned BuildVecNumElts = N0.getNumOperands(); - - EVT SclTy0 = N0.getOperand(0)->getValueType(0); - EVT SclTy1 = N1.getOperand(0)->getValueType(0); - if (SclTy0.isFloatingPoint()) { - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N0.getOperand(i)); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N1.getOperand(i)); - } else { + EVT SVT = VT.getScalarType(); + + EVT MinVT = SVT; + if (!SVT.isFloatingPoint()) { // If BUILD_VECTOR are from built from integer, they may have different - // operand types. Get the smaller type and truncate all operands to it. - EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, - N0.getOperand(i))); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, - N1.getOperand(i))); + // operand types. Get the smallest type and truncate all operands to it. + bool FoundMinVT = false; + for (const SDValue &Op : N->ops()) + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + EVT OpSVT = Op.getOperand(0)->getValueType(0); + MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; + FoundMinVT = true; + } + assert(FoundMinVT && "Concat vector type mismatch"); } + for (const SDValue &Op : N->ops()) { + EVT OpVT = Op.getValueType(); + unsigned NumElts = OpVT.getVectorNumElements(); + + if (ISD::UNDEF == Op.getOpcode()) + Opnds.append(NumElts, DAG.getUNDEF(MinVT)); + + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + if (SVT.isFloatingPoint()) { + assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); + Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); + } else { + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back( + DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); + } + } + } + + assert(VT.getVectorNumElements() == Opnds.size() && + "Concat vector type mismatch"); return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. + if (SDValue V = combineConcatVectorOfScalars(N, DAG)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -11216,7 +12275,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // type. if (V->getOperand(0).getValueType() != NVT) return SDValue(); - unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned Idx = N->getConstantOperandVal(1); unsigned NumElems = NVT.getVectorNumElements(); assert((Idx % NumElems) == 0 && "IDX in concat is not a multiple of the result vector length."); @@ -11347,7 +12406,8 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); } -// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. +// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, +// or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -11361,6 +12421,18 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); unsigned NumConcats = NumElts / NumElemsPerConcat; + // Special case: shuffle(concat(A,B)) can be more efficiently represented + // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high + // half vector elements. + if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF && + std::all_of(SVN->getMask().begin() + NumElemsPerConcat, + SVN->getMask().end(), [](int i) { return i == -1; })) { + N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), + ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat)); + N1 = DAG.getUNDEF(ConcatVT); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); + } + // Look at every vector that's inserted. We're looking for exact // subvector-sized copies from a concatenated vector for (unsigned I = 0; I != NumConcats; ++I) { @@ -11459,7 +12531,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // If it is a splat, check if the argument vector is another splat or a - // build_vector with all scalar elements the same. + // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { SDNode *V = N0.getNode(); @@ -11496,6 +12568,18 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Splat of <x, x, x, x>, return <x, x, x, x> if (AllSame) return N0; + + // Canonicalize any other splat as a build_vector. + const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); + SmallVector<SDValue, 8> Ops(NumElts, Splatted); + SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), + V->getValueType(0), Ops); + + // We may have jumped through bitcasts, so the type of the + // BUILD_VECTOR may not match the type of the shuffle. + if (V->getValueType(0) != VT) + NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); + return NewBV; } } @@ -11516,6 +12600,118 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return V; } + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - + // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { + SmallVector<SDValue, 8> Ops; + for (int M : SVN->getMask()) { + SDValue Op = DAG.getUNDEF(VT.getScalarType()); + if (M >= 0) { + int Idx = M % NumElts; + SDValue &S = (M < (int)NumElts ? N0 : N1); + if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) { + Op = S.getOperand(Idx); + } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) { + if (Idx == 0) + Op = S.getOperand(0); + } else { + // Operand can't be combined - bail out. + break; + } + } + Ops.push_back(Op); + } + if (Ops.size() == VT.getVectorNumElements()) { + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + EVT SVT = VT.getScalarType(); + if (SVT.isInteger()) + for (SDValue &Op : Ops) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + if (SVT != VT.getScalarType()) + for (SDValue &Op : Ops) + Op = TLI.isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT) + : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops); + } + } + + // If this shuffle only has a single input that is a bitcasted shuffle, + // attempt to merge the 2 shuffles and suitably bitcast the inputs/output + // back to their original types. + if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && + N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps && + TLI.isTypeLegal(VT)) { + + // Peek through the bitcast only if there is one user. + SDValue BC0 = N0; + while (BC0.getOpcode() == ISD::BITCAST) { + if (!BC0.hasOneUse()) + break; + BC0 = BC0.getOperand(0); + } + + auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { + if (Scale == 1) + return SmallVector<int, 8>(Mask.begin(), Mask.end()); + + SmallVector<int, 8> NewMask; + for (int M : Mask) + for (int s = 0; s != Scale; ++s) + NewMask.push_back(M < 0 ? -1 : Scale * M + s); + return NewMask; + }; + + if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { + EVT SVT = VT.getScalarType(); + EVT InnerVT = BC0->getValueType(0); + EVT InnerSVT = InnerVT.getScalarType(); + + // Determine which shuffle works with the smaller scalar type. + EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; + EVT ScaleSVT = ScaleVT.getScalarType(); + + if (TLI.isTypeLegal(ScaleVT) && + 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && + 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { + + int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + + // Scale the shuffle masks to the smaller scalar type. + ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); + SmallVector<int, 8> InnerMask = + ScaleShuffleMask(InnerSVN->getMask(), InnerScale); + SmallVector<int, 8> OuterMask = + ScaleShuffleMask(SVN->getMask(), OuterScale); + + // Merge the shuffle masks. + SmallVector<int, 8> NewMask; + for (int M : OuterMask) + NewMask.push_back(M < 0 ? -1 : InnerMask[M]); + + // Test for shuffle mask legality over both commutations. + SDValue SV0 = BC0->getOperand(0); + SDValue SV1 = BC0->getOperand(1); + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + if (!LegalMask) { + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(NewMask); + LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + } + + if (LegalMask) { + SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0); + SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1); + return DAG.getNode( + ISD::BITCAST, SDLoc(N), VT, + DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); + } + } + } + } + // Canonicalize shuffles according to rules: // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) @@ -11543,8 +12739,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) // Don't try to fold shuffles with illegal type. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - TLI.isTypeLegal(VT)) { + // Only fold if this shuffle is the only user of the other shuffle. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && + Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); // The incoming shuffle must be of the same type as the result of the @@ -11624,20 +12821,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Avoid introducing shuffles with illegal mask. if (!TLI.isShuffleMaskLegal(Mask, VT)) { - // Compute the commuted shuffle mask and test again. - for (unsigned i = 0; i != NumElts; ++i) { - int idx = Mask[i]; - if (idx < 0) - continue; - else if (idx < (int)NumElts) - Mask[i] = idx + NumElts; - else - Mask[i] = idx - NumElts; - } + ShuffleVectorSDNode::commuteMask(Mask); if (!TLI.isShuffleMaskLegal(Mask, VT)) return SDValue(); - + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) @@ -11653,6 +12841,34 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { + SDValue InVal = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern + // with a VECTOR_SHUFFLE. + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + SDValue InVec = InVal->getOperand(0); + SDValue EltNo = InVal->getOperand(1); + + // FIXME: We could support implicit truncation if the shuffle can be + // scaled to a smaller vector scalar type. + ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo); + if (C0 && VT == InVec.getValueType() && + VT.getScalarType() == InVal.getValueType()) { + SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1); + int Elt = C0->getZExtValue(); + NewMask[0] = Elt; + + if (TLI.isShuffleMaskLegal(NewMask, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT), + NewMask); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N2 = N->getOperand(2); @@ -11680,48 +12896,64 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold (fp_to_fp16 (fp16_to_fp op)) -> op + if (N0->getOpcode() == ISD::FP16_TO_FP) + return N0->getOperand(0); + + return SDValue(); +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); - SDLoc dl(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - if (N->getOpcode() == ISD::AND) { - if (RHS.getOpcode() == ISD::BITCAST) - RHS = RHS.getOperand(0); - if (RHS.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector<int, 8> Indices; - unsigned NumElts = RHS.getNumOperands(); - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Elt = RHS.getOperand(i); - if (!isa<ConstantSDNode>(Elt)) - return SDValue(); + SDLoc dl(N); - if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) - Indices.push_back(i); - else if (cast<ConstantSDNode>(Elt)->isNullValue()) - Indices.push_back(NumElts+i); - else - return SDValue(); - } + // Make sure we're not running after operation legalization where it + // may have custom lowered the vector shuffles. + if (LegalOperations) + return SDValue(); - // Let's see if the target supports this vector_shuffle. - EVT RVT = RHS.getValueType(); - if (!TLI.isVectorClearMaskLegal(Indices, RVT)) - return SDValue(); + if (N->getOpcode() != ISD::AND) + return SDValue(); + + if (RHS.getOpcode() == ISD::BITCAST) + RHS = RHS.getOperand(0); - // Return the new VECTOR_SHUFFLE node. - EVT EltVT = RVT.getVectorElementType(); - SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); - LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); - SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); - return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); + if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<int, 8> Indices; + unsigned NumElts = RHS.getNumOperands(); + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Elt = RHS.getOperand(i); + if (isAllOnesConstant(Elt)) + Indices.push_back(i); + else if (isNullConstant(Elt)) + Indices.push_back(NumElts+i); + else + return SDValue(); } + + // Let's see if the target supports this vector_shuffle. + EVT RVT = RHS.getValueType(); + if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + EVT EltVT = RVT.getVectorElementType(); + SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), + DAG.getConstant(0, dl, EltVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps); + LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); + SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); + return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); } return SDValue(); @@ -11734,8 +12966,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - SDValue Shuffle = XformToShuffleWithZero(N); - if (Shuffle.getNode()) return Shuffle; + + if (SDValue Shuffle = XformToShuffleWithZero(N)) + return Shuffle; // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold // this operation. @@ -11754,9 +12987,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // Can't fold divide by zero. if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || N->getOpcode() == ISD::FDIV) { - if ((RHSOp.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) || - (RHSOp.getOpcode() == ISD::ConstantFP && + if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP && cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) break; } @@ -11813,38 +13044,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return SDValue(); } -/// Visit a binary vector operation, like FABS/FNEG. -SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { - assert(N->getValueType(0).isVector() && - "SimplifyVUnaryOp only works on vectors!"); - - SDValue N0 = N->getOperand(0); - - if (N0.getOpcode() != ISD::BUILD_VECTOR) - return SDValue(); - - // Operand is a BUILD_VECTOR node, see if we can constant fold it. - SmallVector<SDValue, 8> Ops; - for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { - SDValue Op = N0.getOperand(i); - if (Op.getOpcode() != ISD::UNDEF && - Op.getOpcode() != ISD::ConstantFP) - break; - EVT EltVT = Op.getValueType(); - SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op); - if (FoldOp.getOpcode() != ISD::UNDEF && - FoldOp.getOpcode() != ISD::ConstantFP) - break; - Ops.push_back(FoldOp); - AddToWorklist(FoldOp.getNode()); - } - - if (Ops.size() != N0.getNumOperands()) - return SDValue(); - - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops); -} - SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2){ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); @@ -11881,6 +13080,38 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { + // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + // The select + setcc is redundant, because fsqrt returns NaN for X < -0. + if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { + if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { + // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) + SDValue Sqrt = RHS; + ISD::CondCode CC; + SDValue CmpLHS; + const ConstantFPSDNode *NegZero = nullptr; + + if (TheSelect->getOpcode() == ISD::SELECT_CC) { + CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); + CmpLHS = TheSelect->getOperand(0); + NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1)); + } else { + // SELECT or VSELECT + SDValue Cmp = TheSelect->getOperand(0); + if (Cmp.getOpcode() == ISD::SETCC) { + CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); + CmpLHS = Cmp.getOperand(0); + NegZero = isConstOrConstSplatFP(Cmp.getOperand(1)); + } + } + if (NegZero && NegZero->isNegative() && NegZero->isZero() && + Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || + CC == ISD::SETULT || CC == ISD::SETLT)) { + // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + CombineTo(TheSelect, Sqrt); + return true; + } + } + } // Cannot simplify select with vector condition if (TheSelect->getOperand(0).getValueType().isVector()) return false; @@ -11902,6 +13133,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. LLD->isVolatile() || RLD->isVolatile() || + // FIXME: If either is a pre/post inc/dec load, + // we'd need to split out the address adjustment. + LLD->isIndexed() || RLD->isIndexed() || // If this is an EXTLOAD, the VT's must match. LLD->getMemoryVT() != RLD->getMemoryVT() || // If this is an EXTLOAD, the kind of extension must match. @@ -12003,20 +13237,17 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, EVT VT = N2.getValueType(); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); - ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); // Determine if the condition we're dealing with is constant SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, DL, false); if (SCC.getNode()) AddToWorklist(SCC.getNode()); - ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); - // fold select_cc true, x, y -> x - if (SCCC && !SCCC->isNullValue()) - return N2; - // fold select_cc false, x, y -> y - if (SCCC && SCCC->isNullValue()) - return N3; + if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { + // fold select_cc true, x, y -> x + // fold select_cc false, x, y -> y + return !SCCC->isNullValue() ? N2 : N3; + } // Check to see if we can simplify the select into an fabs node if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { @@ -12069,9 +13300,9 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Get the offsets to the 0 and 1 element of the array so that we can // select between them. - SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Zero = DAG.getIntPtrConstant(0, DL); unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); - SDValue One = DAG.getIntPtrConstant(EltSize); + SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), @@ -12086,24 +13317,23 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, Alignment); - } } // Check to see if we can perform the "gzip trick", transforming // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) - if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && - (N1C->isNullValue() || // (a < 0) ? b : 0 - (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 + if (isNullConstant(N3) && CC == ISD::SETLT && + (isNullConstant(N1) || // (a < 0) ? b : 0 + (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0 EVT XType = N0.getValueType(); EVT AType = N2.getValueType(); if (XType.bitsGE(AType)) { // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a // single-bit constant. - if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { unsigned ShCtV = N2C->getAPIntValue().logBase2(); - ShCtV = XType.getSizeInBits()-ShCtV-1; - SDValue ShCt = DAG.getConstant(ShCtV, + ShCtV = XType.getSizeInBits() - ShCtV - 1; + SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0), getShiftAmountTy(N0.getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, ShCt); @@ -12119,7 +13349,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, + DAG.getConstant(XType.getSizeInBits() - 1, + SDLoc(N0), getShiftAmountTy(N0.getValueType()))); AddToWorklist(Shift.getNode()); @@ -12139,23 +13370,21 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // single bit-test can be materialized as an all-ones register with // shift-left and shift-right-arith. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && - N0->getValueType(0) == VT && - N1C && N1C->isNullValue() && - N2C && N2C->isNullValue()) { + N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { SDValue AndLHS = N0->getOperand(0); ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { // Shift the tested bit over the sign bit. APInt AndMask = ConstAndRHS->getAPIntValue(); SDValue ShlAmt = - DAG.getConstant(AndMask.countLeadingZeros(), + DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), getShiftAmountTy(AndLHS.getValueType())); SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is either // all-ones, or zero. SDValue ShrAmt = - DAG.getConstant(AndMask.getBitWidth()-1, + DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), getShiftAmountTy(Shl.getValueType())); SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); @@ -12164,13 +13393,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, } // fold select C, 16, 0 -> shl C, 4 - if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && + if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() && TLI.getBooleanContents(N0.getValueType()) == TargetLowering::ZeroOrOneBooleanContent) { // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. - if (NotExtCompare && N2C->getAPIntValue() == 1) + if (NotExtCompare && N2C->isOne()) return SDValue(); // Get a SetCC of the condition @@ -12198,13 +13427,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, AddToWorklist(SCC.getNode()); AddToWorklist(Temp.getNode()); - if (N2C->getAPIntValue() == 1) + if (N2C->isOne()) return Temp; // shl setcc result by log2 n2c return DAG.getNode( ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), + DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp), getShiftAmountTy(Temp.getValueType()))); } } @@ -12212,7 +13441,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Check to see if this is the equivalent of setcc // FIXME: Turn all of these into setcc if setcc if setcc is legal // otherwise, go ahead with the folds. - if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { + if (0 && isNullConstant(N3) && isOneConstant(N2)) { EVT XType = N0.getValueType(); if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { @@ -12223,30 +13452,34 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, } // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) - if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && + if (isNullConstant(N1) && CC == ISD::SETEQ && (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, XType))) { SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); return DAG.getNode(ISD::SRL, DL, XType, Ctlz, DAG.getConstant(Log2_32(XType.getSizeInBits()), + SDLoc(Ctlz), getShiftAmountTy(Ctlz.getValueType()))); } // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) - if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { - SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0), - XType, DAG.getConstant(0, XType), N0); - SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType); + if (isNullConstant(N1) && CC == ISD::SETGT) { + SDLoc DL(N0); + SDValue NegN0 = DAG.getNode(ISD::SUB, DL, + XType, DAG.getConstant(0, DL, XType), N0); + SDValue NotN0 = DAG.getNOT(DL, N0, XType); return DAG.getNode(ISD::SRL, DL, XType, DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), - DAG.getConstant(XType.getSizeInBits()-1, + DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(XType))); } // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) - if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { - SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, + if (isAllOnesConstant(N1) && CC == ISD::SETGT) { + SDLoc DL(N0); + SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0, + DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); - return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); + return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL, + XType)); } } @@ -12269,11 +13502,12 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, EVT XType = N0.getValueType(); if (SubC && SubC->isNullValue() && XType.isInteger()) { - SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, + SDLoc DL(N0); + SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, + DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); - SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), + SDValue Add = DAG.getNode(ISD::ADD, DL, XType, N0, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -12303,7 +13537,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { return SDValue(); // Avoid division by zero. - if (!C->getAPIntValue()) + if (C->isNullValue()) return SDValue(); std::vector<SDNode*> Built; @@ -12323,7 +13557,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { return SDValue(); // Avoid division by zero. - if (!C->getAPIntValue()) + if (C->isNullValue()) return SDValue(); std::vector<SDNode *> Built; @@ -12344,7 +13578,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return SDValue(); // Avoid division by zero. - if (!C->getAPIntValue()) + if (C->isNullValue()) return SDValue(); std::vector<SDNode*> Built; @@ -12374,7 +13608,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { // does not require additional intermediate precision] EVT VT = Op.getValueType(); SDLoc DL(Op); - SDValue FPOne = DAG.getConstantFP(1.0, VT); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); AddToWorklist(Est.getNode()); @@ -12409,7 +13643,7 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); - SDValue ThreeHalves = DAG.getConstantFP(1.5, VT); + SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. @@ -12445,8 +13679,8 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); - SDValue MinusThree = DAG.getConstantFP(-3.0, VT); - SDValue MinusHalf = DAG.getConstantFP(-0.5, VT); + SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); + SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index c46539b..0351c33 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -44,6 +44,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -61,8 +62,8 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -424,7 +425,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { // Check if the second operand is a constant and handle it appropriately. if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t Imm = CI->getZExtValue(); + uint64_t Imm = CI->getSExtValue(); // Transform "sdiv exact X, 8" -> "sra X, 3". if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && @@ -710,7 +711,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { CallingConv::ID CC = I->getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; bool HasDef = !I->getType()->isVoidTy(); - Value *Callee = I->getOperand(PatchPointOpers::TargetPos); + Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts(); // Get the real number of arguments participating in the call <numArgs> assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) && @@ -756,23 +757,25 @@ bool FastISel::selectPatchpoint(const CallInst *I) { cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); - // Assume that the callee is a constant address or null pointer. - // FIXME: handle function symbols in the future. - uint64_t CalleeAddr; - if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) - CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue(); - else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) { - if (C->getOpcode() == Instruction::IntToPtr) - CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue(); - else + // Add the call target. + if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) { + uint64_t CalleeConstAddr = + cast<ConstantInt>(C->getOperand(0))->getZExtValue(); + Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr)); + } else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) { + if (C->getOpcode() == Instruction::IntToPtr) { + uint64_t CalleeConstAddr = + cast<ConstantInt>(C->getOperand(0))->getZExtValue(); + Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr)); + } else llvm_unreachable("Unsupported ConstantExpr."); + } else if (const auto *GV = dyn_cast<GlobalValue>(Callee)) { + Ops.push_back(MachineOperand::CreateGA(GV, 0)); } else if (isa<ConstantPointerNull>(Callee)) - CalleeAddr = 0; + Ops.push_back(MachineOperand::CreateImm(0)); else llvm_unreachable("Unsupported callee address."); - Ops.push_back(MachineOperand::CreateImm(CalleeAddr)); - // Adjust <numArgs> to account for any arguments that have been passed on // the stack instead. unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size(); @@ -801,7 +804,8 @@ bool FastISel::selectPatchpoint(const CallInst *I) { return false; // Push the register mask info. - Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC))); + Ops.push_back(MachineOperand::CreateRegMask( + TRI.getCallPreservedMask(*FuncInfo.MF, CC))); // Add scratch registers as implicit def and early clobber. const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); @@ -1077,12 +1081,17 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // The donothing intrinsic does, well, nothing. case Intrinsic::donothing: return true; + case Intrinsic::eh_actions: { + unsigned ResultReg = getRegForValue(UndefValue::get(II->getType())); + if (!ResultReg) + return false; + updateValueMap(II, ResultReg); + return true; + } case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); - DIVariable DIVar(DI->getVariable()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) { + assert(DI->getVariable() && "Missing variable"); + if (!FuncInfo.MF->getMMI().hasDebugInfo()) { DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } @@ -1122,6 +1131,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { false); if (Op) { + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); if (Op->isReg()) { Op->setIsDebug(true); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1146,6 +1157,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const DbgValueInst *DI = cast<DbgValueInst>(II); const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. @@ -1580,7 +1593,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo, bool SkipTargetIndependentISel) : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), - TM(FuncInfo.MF->getTarget()), DL(*MF->getSubtarget().getDataLayout()), + TM(FuncInfo.MF->getTarget()), DL(*TM.getDataLayout()), TII(*MF->getSubtarget().getInstrInfo()), TLI(*MF->getSubtarget().getTargetLowering()), TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), @@ -1662,6 +1675,7 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, if (ResultReg) return ResultReg; unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + bool IsImmKill = true; if (!MaterialReg) { // This is a bit ugly/slow, but failing here means falling out of // fast-isel, which would be very slow. @@ -1670,9 +1684,15 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); if (!MaterialReg) return 0; + // FIXME: If the materialized register here has no uses yet then this + // will be the first use and we should be able to mark it as killed. + // However, the local value area for materialising constant expressions + // grows down, not up, which means that any constant expressions we generate + // later which also use 'Imm' could be after this instruction and therefore + // after this kill. + IsImmKill = false; } - return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, - /*IsKill=*/true); + return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill); } unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 19aca6e..7b5b8c4 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" @@ -31,6 +32,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -78,12 +80,40 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) { return ExtendKind; } +namespace { +struct WinEHNumbering { + WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo), + CurrentBaseState(-1), NextState(0) {} + + WinEHFuncInfo &FuncInfo; + int CurrentBaseState; + int NextState; + + SmallVector<std::unique_ptr<ActionHandler>, 4> HandlerStack; + SmallPtrSet<const Function *, 4> VisitedHandlers; + + int currentEHNumber() const { + return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState(); + } + + void createUnwindMapEntry(int ToState, ActionHandler *AH); + void createTryBlockMapEntry(int TryLow, int TryHigh, + ArrayRef<CatchHandler *> Handlers); + void processCallSite(MutableArrayRef<std::unique_ptr<ActionHandler>> Actions, + ImmutableCallSite CS); + void popUnmatchedActions(int FirstMismatch); + void calculateStateNumbers(const Function &F); + void findActionRootLPads(const Function &F); +}; +} + void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, SelectionDAG *DAG) { Fn = &fn; MF = &mf; TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); + MachineModuleInfo &MMI = MF->getMMI(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; @@ -133,16 +163,17 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, ImmutableCallSite CS(I); if (isa<InlineAsm>(CS.getCalledValue())) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); std::vector<TargetLowering::AsmOperandInfo> Ops = - TLI->ParseConstraints(CS); + TLI->ParseConstraints(TRI, CS); for (size_t I = 0, E = Ops.size(); I != E; ++I) { TargetLowering::AsmOperandInfo &Op = Ops[I]; if (Op.Type == InlineAsm::isClobber) { // Clobbers don't have SDValue operands, hence SDValue(). TLI->ComputeConstraintToUse(Op, SDValue(), DAG); std::pair<unsigned, const TargetRegisterClass *> PhysReg = - TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, - Op.ConstraintVT); + TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode, + Op.ConstraintVT); if (PhysReg.first == SP) MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true); } @@ -176,13 +207,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // during the initial isel pass through the IR so that it is done // in a predictable order. if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { - MachineModuleInfo &MMI = MF->getMMI(); - DIVariable DIVar(DI->getVariable()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (MMI.hasDebugInfo() && - DIVar && - !DI->getDebugLoc().isUnknown()) { + assert(DI->getVariable() && "Missing variable"); + assert(DI->getDebugLoc() && "Missing location"); + if (MMI.hasDebugInfo()) { // Don't handle byval struct arguments or VLAs, for example. // Non-byval arguments are handled here (they refer to the stack // temporary alloca at this point). @@ -249,9 +276,414 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } // Mark landing pad blocks. - for (BB = Fn->begin(); BB != EB; ++BB) - if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) + SmallVector<const LandingPadInst *, 4> LPads; + for (BB = Fn->begin(); BB != EB; ++BB) { + if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); + if (BB->isLandingPad()) + LPads.push_back(BB->getLandingPadInst()); + } + + // If this is an MSVC EH personality, we need to do a bit more work. + EHPersonality Personality = EHPersonality::Unknown; + if (!LPads.empty()) + Personality = classifyEHPersonality(LPads.back()->getPersonalityFn()); + if (!isMSVCEHPersonality(Personality)) + return; + + WinEHFuncInfo *EHInfo = nullptr; + if (Personality == EHPersonality::MSVC_Win64SEH) { + addSEHHandlersForLPads(LPads); + } else if (Personality == EHPersonality::MSVC_CXX) { + const Function *WinEHParentFn = MMI.getWinEHParent(&fn); + EHInfo = &MMI.getWinEHFuncInfo(WinEHParentFn); + if (EHInfo->LandingPadStateMap.empty()) { + WinEHNumbering Num(*EHInfo); + Num.findActionRootLPads(*WinEHParentFn); + // The VisitedHandlers list is used by both findActionRootLPads and + // calculateStateNumbers, but both functions need to visit all handlers. + Num.VisitedHandlers.clear(); + Num.calculateStateNumbers(*WinEHParentFn); + // Pop everything on the handler stack. + // It may be necessary to call this more than once because a handler can + // be pushed on the stack as a result of clearing the stack. + while (!Num.HandlerStack.empty()) + Num.processCallSite(None, ImmutableCallSite()); + } + + // Copy the state numbers to LandingPadInfo for the current function, which + // could be a handler or the parent. + for (const LandingPadInst *LP : LPads) { + MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()]; + MMI.addWinEHState(LPadMBB, EHInfo->LandingPadStateMap[LP]); + } + } +} + +void FunctionLoweringInfo::addSEHHandlersForLPads( + ArrayRef<const LandingPadInst *> LPads) { + MachineModuleInfo &MMI = MF->getMMI(); + + // Iterate over all landing pads with llvm.eh.actions calls. + for (const LandingPadInst *LP : LPads) { + const IntrinsicInst *ActionsCall = + dyn_cast<IntrinsicInst>(LP->getNextNode()); + if (!ActionsCall || + ActionsCall->getIntrinsicID() != Intrinsic::eh_actions) + continue; + + // Parse the llvm.eh.actions call we found. + MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()]; + SmallVector<std::unique_ptr<ActionHandler>, 4> Actions; + parseEHActions(ActionsCall, Actions); + + // Iterate EH actions from most to least precedence, which means + // iterating in reverse. + for (auto I = Actions.rbegin(), E = Actions.rend(); I != E; ++I) { + ActionHandler *Action = I->get(); + if (auto *CH = dyn_cast<CatchHandler>(Action)) { + const auto *Filter = + dyn_cast<Function>(CH->getSelector()->stripPointerCasts()); + assert((Filter || CH->getSelector()->isNullValue()) && + "expected function or catch-all"); + const auto *RecoverBA = + cast<BlockAddress>(CH->getHandlerBlockOrFunc()); + MMI.addSEHCatchHandler(LPadMBB, Filter, RecoverBA); + } else { + assert(isa<CleanupHandler>(Action)); + const auto *Fini = cast<Function>(Action->getHandlerBlockOrFunc()); + MMI.addSEHCleanupHandler(LPadMBB, Fini); + } + } + } +} + +void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) { + WinEHUnwindMapEntry UME; + UME.ToState = ToState; + if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH)) + UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc()); + else + UME.Cleanup = nullptr; + FuncInfo.UnwindMap.push_back(UME); +} + +void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh, + ArrayRef<CatchHandler *> Handlers) { + // See if we already have an entry for this set of handlers. + // This is using iterators rather than a range-based for loop because + // if we find the entry we're looking for we'll need the iterator to erase it. + int NumHandlers = Handlers.size(); + auto I = FuncInfo.TryBlockMap.begin(); + auto E = FuncInfo.TryBlockMap.end(); + for ( ; I != E; ++I) { + auto &Entry = *I; + if (Entry.HandlerArray.size() != (size_t)NumHandlers) + continue; + int N; + for (N = 0; N < NumHandlers; ++N) { + if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc()) + break; // breaks out of inner loop + } + // If all the handlers match, this is what we were looking for. + if (N == NumHandlers) { + break; + } + } + + // If we found an existing entry for this set of handlers, extend the range + // but move the entry to the end of the map vector. The order of entries + // in the map is critical to the way that the runtime finds handlers. + // FIXME: Depending on what has happened with block ordering, this may + // incorrectly combine entries that should remain separate. + if (I != E) { + // Copy the existing entry. + WinEHTryBlockMapEntry Entry = *I; + Entry.TryLow = std::min(TryLow, Entry.TryLow); + Entry.TryHigh = std::max(TryHigh, Entry.TryHigh); + assert(Entry.TryLow <= Entry.TryHigh); + // Erase the old entry and add this one to the back. + FuncInfo.TryBlockMap.erase(I); + FuncInfo.TryBlockMap.push_back(Entry); + return; + } + + // If we didn't find an entry, create a new one. + WinEHTryBlockMapEntry TBME; + TBME.TryLow = TryLow; + TBME.TryHigh = TryHigh; + assert(TBME.TryLow <= TBME.TryHigh); + for (CatchHandler *CH : Handlers) { + WinEHHandlerType HT; + if (CH->getSelector()->isNullValue()) { + HT.Adjectives = 0x40; + HT.TypeDescriptor = nullptr; + } else { + auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts()); + // Selectors are always pointers to GlobalVariables with 'struct' type. + // The struct has two fields, adjectives and a type descriptor. + auto *CS = cast<ConstantStruct>(GV->getInitializer()); + HT.Adjectives = + cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue(); + HT.TypeDescriptor = + cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts()); + } + HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc()); + HT.CatchObjRecoverIdx = CH->getExceptionVarIndex(); + TBME.HandlerArray.push_back(HT); + } + FuncInfo.TryBlockMap.push_back(TBME); +} + +static void print_name(const Value *V) { +#ifndef NDEBUG + if (!V) { + DEBUG(dbgs() << "null"); + return; + } + + if (const auto *F = dyn_cast<Function>(V)) + DEBUG(dbgs() << F->getName()); + else + DEBUG(V->dump()); +#endif +} + +void WinEHNumbering::processCallSite( + MutableArrayRef<std::unique_ptr<ActionHandler>> Actions, + ImmutableCallSite CS) { + DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber() + << ") for: "); + print_name(CS ? CS.getCalledValue() : nullptr); + DEBUG(dbgs() << '\n'); + + DEBUG(dbgs() << "HandlerStack: \n"); + for (int I = 0, E = HandlerStack.size(); I < E; ++I) { + DEBUG(dbgs() << " "); + print_name(HandlerStack[I]->getHandlerBlockOrFunc()); + DEBUG(dbgs() << '\n'); + } + DEBUG(dbgs() << "Actions: \n"); + for (int I = 0, E = Actions.size(); I < E; ++I) { + DEBUG(dbgs() << " "); + print_name(Actions[I]->getHandlerBlockOrFunc()); + DEBUG(dbgs() << '\n'); + } + int FirstMismatch = 0; + for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E; + ++FirstMismatch) { + if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() != + Actions[FirstMismatch]->getHandlerBlockOrFunc()) + break; + } + + // Remove unmatched actions from the stack and process their EH states. + popUnmatchedActions(FirstMismatch); + + DEBUG(dbgs() << "Pushing actions for CallSite: "); + print_name(CS ? CS.getCalledValue() : nullptr); + DEBUG(dbgs() << '\n'); + + bool LastActionWasCatch = false; + const LandingPadInst *LastRootLPad = nullptr; + for (size_t I = FirstMismatch; I != Actions.size(); ++I) { + // We can reuse eh states when pushing two catches for the same invoke. + bool CurrActionIsCatch = isa<CatchHandler>(Actions[I].get()); + auto *Handler = cast<Function>(Actions[I]->getHandlerBlockOrFunc()); + // Various conditions can lead to a handler being popped from the + // stack and re-pushed later. That shouldn't create a new state. + // FIXME: Can code optimization lead to re-used handlers? + if (FuncInfo.HandlerEnclosedState.count(Handler)) { + // If we already assigned the state enclosed by this handler re-use it. + Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]); + continue; + } + const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler]; + if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) { + DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n"); + Actions[I]->setEHState(currentEHNumber()); + } else { + DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", "); + print_name(Actions[I]->getHandlerBlockOrFunc()); + DEBUG(dbgs() << ") with EH state " << NextState << "\n"); + createUnwindMapEntry(currentEHNumber(), Actions[I].get()); + DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n"); + Actions[I]->setEHState(NextState); + NextState++; + } + HandlerStack.push_back(std::move(Actions[I])); + LastActionWasCatch = CurrActionIsCatch; + LastRootLPad = RootLPad; + } + + // This is used to defer numbering states for a handler until after the + // last time it appears in an invoke action list. + if (CS.isInvoke()) { + for (int I = 0, E = HandlerStack.size(); I < E; ++I) { + auto *Handler = cast<Function>(HandlerStack[I]->getHandlerBlockOrFunc()); + if (FuncInfo.LastInvoke[Handler] != cast<InvokeInst>(CS.getInstruction())) + continue; + FuncInfo.LastInvokeVisited[Handler] = true; + DEBUG(dbgs() << "Last invoke of "); + print_name(Handler); + DEBUG(dbgs() << " has been visited.\n"); + } + } + + DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: "); + print_name(CS ? CS.getCalledValue() : nullptr); + DEBUG(dbgs() << '\n'); +} + +void WinEHNumbering::popUnmatchedActions(int FirstMismatch) { + // Don't recurse while we are looping over the handler stack. Instead, defer + // the numbering of the catch handlers until we are done popping. + SmallVector<CatchHandler *, 4> PoppedCatches; + for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) { + std::unique_ptr<ActionHandler> Handler = HandlerStack.pop_back_val(); + if (isa<CatchHandler>(Handler.get())) + PoppedCatches.push_back(cast<CatchHandler>(Handler.release())); + } + + int TryHigh = NextState - 1; + int LastTryLowIdx = 0; + for (int I = 0, E = PoppedCatches.size(); I != E; ++I) { + CatchHandler *CH = PoppedCatches[I]; + DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n"); + if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) { + int TryLow = CH->getEHState(); + auto Handlers = + makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1); + DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh); + for (size_t J = 0; J < Handlers.size(); ++J) { + DEBUG(dbgs() << ", "); + print_name(Handlers[J]->getHandlerBlockOrFunc()); + } + DEBUG(dbgs() << ")\n"); + createTryBlockMapEntry(TryLow, TryHigh, Handlers); + LastTryLowIdx = I + 1; + } + } + + for (CatchHandler *CH : PoppedCatches) { + if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) { + if (FuncInfo.LastInvokeVisited[F]) { + DEBUG(dbgs() << "Assigning base state " << NextState << " to "); + print_name(F); + DEBUG(dbgs() << '\n'); + FuncInfo.HandlerBaseState[F] = NextState; + DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() + << ", null)\n"); + createUnwindMapEntry(currentEHNumber(), nullptr); + ++NextState; + calculateStateNumbers(*F); + } + else { + DEBUG(dbgs() << "Deferring handling of "); + print_name(F); + DEBUG(dbgs() << " until last invoke visited.\n"); + } + } + delete CH; + } +} + +void WinEHNumbering::calculateStateNumbers(const Function &F) { + auto I = VisitedHandlers.insert(&F); + if (!I.second) + return; // We've already visited this handler, don't renumber it. + + int OldBaseState = CurrentBaseState; + if (FuncInfo.HandlerBaseState.count(&F)) { + CurrentBaseState = FuncInfo.HandlerBaseState[&F]; + } + + size_t SavedHandlerStackSize = HandlerStack.size(); + + DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n'); + SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; + for (const BasicBlock &BB : F) { + for (const Instruction &I : BB) { + const auto *CI = dyn_cast<CallInst>(&I); + if (!CI || CI->doesNotThrow()) + continue; + processCallSite(None, CI); + } + const auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); + if (!II) + continue; + const LandingPadInst *LPI = II->getLandingPadInst(); + auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode()); + if (!ActionsCall) + continue; + assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions); + parseEHActions(ActionsCall, ActionList); + if (ActionList.empty()) + continue; + processCallSite(ActionList, II); + ActionList.clear(); + FuncInfo.LandingPadStateMap[LPI] = currentEHNumber(); + DEBUG(dbgs() << "Assigning state " << currentEHNumber() + << " to landing pad at " << LPI->getParent()->getName() + << '\n'); + } + + // Pop any actions that were pushed on the stack for this function. + popUnmatchedActions(SavedHandlerStackSize); + + DEBUG(dbgs() << "Assigning max state " << NextState - 1 + << " to " << F.getName() << '\n'); + FuncInfo.CatchHandlerMaxState[&F] = NextState - 1; + + CurrentBaseState = OldBaseState; +} + +// This function follows the same basic traversal as calculateStateNumbers +// but it is necessary to identify the root landing pad associated +// with each action before we start assigning state numbers. +void WinEHNumbering::findActionRootLPads(const Function &F) { + auto I = VisitedHandlers.insert(&F); + if (!I.second) + return; // We've already visited this handler, don't revisit it. + + SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; + for (const BasicBlock &BB : F) { + const auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); + if (!II) + continue; + const LandingPadInst *LPI = II->getLandingPadInst(); + auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode()); + if (!ActionsCall) + continue; + + assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions); + parseEHActions(ActionsCall, ActionList); + if (ActionList.empty()) + continue; + for (int I = 0, E = ActionList.size(); I < E; ++I) { + if (auto *Handler + = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) { + FuncInfo.LastInvoke[Handler] = II; + // Don't replace the root landing pad if we previously saw this + // handler in a different function. + if (FuncInfo.RootLPad.count(Handler) && + FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F) + continue; + DEBUG(dbgs() << "Setting root lpad for "); + print_name(Handler); + DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n'); + FuncInfo.RootLPad[Handler] = LPI; + } + } + // Walk the actions again and look for nested handlers. This has to + // happen after all of the actions have been processed in the current + // function. + for (int I = 0, E = ActionList.size(); I < E; ++I) + if (auto *Handler + = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) + findActionRootLPads(*Handler); + ActionList.clear(); + } } /// clear - Clear out all the function-specific state. This returns this @@ -274,6 +706,7 @@ void FunctionLoweringInfo::clear() { ByValArgFrameIndexMap.clear(); RegFixups.clear(); StatepointStackSlots.clear(); + StatepointRelocatedValues.clear(); PreferredExtendType.clear(); } @@ -460,8 +893,7 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I, if (FT->isVarArg() && !MMI->usesVAFloatArgument()) { for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { Type* T = I.getArgOperand(i)->getType(); - for (po_iterator<Type*> i = po_begin(T), e = po_end(T); - i != e; ++i) { + for (auto i : post_order(T)) { if (i->isFloatingPointTy()) { MMI->setUsesVAFloatArgument(true); return; @@ -471,60 +903,6 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I, } } -/// AddCatchInfo - Extract the personality and type infos from an eh.selector -/// call, and add them to the specified machine basic block. -void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, - MachineBasicBlock *MBB) { - // Inform the MachineModuleInfo of the personality for this landing pad. - const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1)); - assert(CE->getOpcode() == Instruction::BitCast && - isa<Function>(CE->getOperand(0)) && - "Personality should be a function"); - MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0))); - - // Gather all the type infos for this landing pad and pass them along to - // MachineModuleInfo. - std::vector<const GlobalValue *> TyInfo; - unsigned N = I.getNumArgOperands(); - - for (unsigned i = N - 1; i > 1; --i) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) { - unsigned FilterLength = CI->getZExtValue(); - unsigned FirstCatch = i + FilterLength + !FilterLength; - assert(FirstCatch <= N && "Invalid filter length"); - - if (FirstCatch < N) { - TyInfo.reserve(N - FirstCatch); - for (unsigned j = FirstCatch; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); - MMI->addCatchTypeInfo(MBB, TyInfo); - TyInfo.clear(); - } - - if (!FilterLength) { - // Cleanup. - MMI->addCleanup(MBB); - } else { - // Filter. - TyInfo.reserve(FilterLength - 1); - for (unsigned j = i + 1; j < FirstCatch; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); - MMI->addFilterTypeInfo(MBB, TyInfo); - TyInfo.clear(); - } - - N = i; - } - } - - if (N > 2) { - TyInfo.reserve(N - 2); - for (unsigned j = 2; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); - MMI->addCatchTypeInfo(MBB, TyInfo); - } -} - /// AddLandingPadInfo - Extract the exception handling information from the /// landingpad instruction and add them to the specified machine module info. void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index a65f33e..7abc0c4 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -406,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { - Align = MF->getSubtarget().getDataLayout()->getPrefTypeAlignment(Type); + Align = MF->getTarget().getDataLayout()->getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! - Align = MF->getSubtarget().getDataLayout()->getTypeAllocSize(Type); + Align = MF->getTarget().getDataLayout()->getTypeAllocSize(Type); } } @@ -650,6 +650,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MDNode *Var = SD->getVariable(); MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. @@ -951,6 +953,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // Remember to operand index of the group flags. SmallVector<unsigned, 8> GroupIdx; + // Remember registers that are part of early-clobber defs. + SmallVector<unsigned, 8> ECRegs; + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = @@ -979,6 +984,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + ECRegs.push_back(Reg); } break; case InlineAsm::Kind_RegUse: // Use of register. @@ -1004,6 +1010,19 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } } + // GCC inline assembly allows input operands to also be early-clobber + // output operands (so long as the operand is written only after it's + // used), but this does not match the semantics of our early-clobber flag. + // If an early-clobber operand register is also an input operand register, + // then remove the early-clobber flag. + for (unsigned Reg : ECRegs) { + if (MIB->readsRegister(Reg, TRI)) { + MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI); + assert(MO && "No def operand for clobbered register?"); + MO->setIsEarlyClobber(false); + } + } + // Get the mdnode from the asm if it exists and add it to the instruction. SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e5473e3..7d98872 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -249,7 +249,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue()); if (!UseCP) { assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); - return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), + return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), dl, (VT == MVT::f64) ? MVT::i64 : MVT::i32); } @@ -331,7 +331,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, MachinePointerInfo(), StoredVT, false, false, 0); - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy(AS)); + SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy(AS)); SmallVector<SDValue, 8> Stores; unsigned Offset = 0; @@ -385,7 +385,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, int IncrementSize = NumBits / 8; // Divide the stored value in two parts. - SDValue ShiftAmount = DAG.getConstant(NumBits, + SDValue ShiftAmount = DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Val.getValueType())); SDValue Lo = Val; SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); @@ -397,7 +397,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, ST->isVolatile(), ST->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy(AS))); + DAG.getConstant(IncrementSize, dl, TLI.getPointerTy(AS))); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), @@ -448,7 +448,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Make sure the stack slot is also aligned for the register type. SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy()); SmallVector<SDValue, 8> Stores; SDValue StackPtr = StackBase; unsigned Offset = 0; @@ -528,7 +528,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->isNonTemporal(), LD->isInvariant(), Alignment, LD->getAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), @@ -540,7 +540,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->isNonTemporal(), LD->isInvariant(), Alignment, LD->getAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), @@ -549,7 +549,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, } // aggregate the two parts - SDValue ShiftAmount = DAG.getConstant(NumBits, + SDValue ShiftAmount = DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Hi.getValueType())); SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); @@ -596,7 +596,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3); // Add the offset to the index. unsigned EltSize = EltVT.getSizeInBits()/8; - Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT)); + Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3, + DAG.getConstant(EltSize, dl, IdxVT)); SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); // Store the scalar value. Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT, @@ -655,7 +656,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { TLI.isTypeLegal(MVT::i32)) { SDValue Con = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), - MVT::i32); + SDLoc(CFP), MVT::i32); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment, AAInfo); } @@ -664,7 +665,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { // If this target supports 64-bit registers, do a single 64-bit store. if (TLI.isTypeLegal(MVT::i64)) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). - zextOrTrunc(64), MVT::i64); + zextOrTrunc(64), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment, AAInfo); } @@ -673,15 +674,15 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { // Otherwise, if the target supports 32-bit registers, use 2 32-bit // stores. If the target supports neither 32- nor 64-bits, this // xform is certainly not worth it. - const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); - SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32); - SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); + const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32); + SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(4, Ptr.getValueType())); + DAG.getConstant(4, dl, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, MinAlign(Alignment, 4U), @@ -731,7 +732,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) + if (Res && Res != SDValue(Node, 0)) ReplaceNode(SDValue(Node, 0), Res); return; } @@ -792,9 +793,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(RoundWidth, + DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), @@ -805,7 +807,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X // Store the top RoundWidth bits. Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(ExtraWidth, + DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, @@ -814,7 +816,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -843,7 +846,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) + if (Res && Res != SDValue(Node, 0)) ReplaceNode(SDValue(Node, 0), Res); return; } @@ -1004,7 +1007,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, isInvariant, @@ -1017,7 +1021,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(RoundWidth, + DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. @@ -1033,7 +1037,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), @@ -1047,7 +1052,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(ExtraWidth, + DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. @@ -1240,12 +1245,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; case ISD::EXTRACT_ELEMENT: case ISD::FLT_ROUNDS_: - case ISD::SADDO: - case ISD::SSUBO: - case ISD::UADDO: - case ISD::USUBO: - case ISD::SMULO: - case ISD::UMULO: case ISD::FPOWI: case ISD::MERGE_VALUES: case ISD::EH_RETURN: @@ -1437,18 +1436,32 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { unsigned EltSize = Vec.getValueType().getVectorElementType().getSizeInBits()/8; Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, - DAG.getConstant(EltSize, Idx.getValueType())); + DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); + SDValue NewLoad; + if (Op.getValueType().isVector()) - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(), - false, false, false, 0); - return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, - MachinePointerInfo(), - Vec.getValueType().getVectorElementType(), - false, false, false, 0); + NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, + MachinePointerInfo(), false, false, false, 0); + else + NewLoad = DAG.getExtLoad( + ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), + Vec.getValueType().getVectorElementType(), false, false, false, 0); + + // Replace the chain going out of the store, by the one out of the load. + DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); + + // We introduced a cycle though, so update the loads operands, making sure + // to use the original store's chain as an incoming chain. + SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(), + NewLoad->op_end()); + NewLoadOperands[0] = Ch; + NewLoad = + SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0); + return NewLoad; } SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { @@ -1476,7 +1489,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { Vec.getValueType().getVectorElementType().getSizeInBits()/8; Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, - DAG.getConstant(EltSize, Idx.getValueType())); + DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, @@ -1513,7 +1526,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { unsigned Offset = TypeByteSize*i; - SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); + SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); // If the destination vector element type is narrower than the source @@ -1575,7 +1588,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, - DAG.getConstant(ByteOffset, LoadPtr.getValueType())); + DAG.getConstant(ByteOffset, dl, + LoadPtr.getValueType())); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), false, false, false, 0); @@ -1585,13 +1599,14 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?"); if (BitShift) SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit, - DAG.getConstant(BitShift, + DAG.getConstant(BitShift, dl, TLI.getShiftAmountTy(SignBit.getValueType()))); } } // Now get the sign bit proper, by seeing whether the value is negative. SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()), - SignBit, DAG.getConstant(0, SignBit.getValueType()), + SignBit, + DAG.getConstant(0, dl, SignBit.getValueType()), ISD::SETLT); // Get the absolute value of the result. SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); @@ -1616,8 +1631,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true), - SDLoc(Node)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); @@ -1628,12 +1642,11 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, - DAG.getConstant(-(uint64_t)Align, VT)); + DAG.getConstant(-(uint64_t)Align, dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain - Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), - DAG.getIntPtrConstant(0, true), SDValue(), - SDLoc(Node)); + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); Results.push_back(Tmp1); Results.push_back(Tmp2); @@ -2404,7 +2417,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); // word offset constant for Hi/Lo address computation - SDValue WordOff = DAG.getConstant(sizeof(int), StackSlot.getValueType()); + SDValue WordOff = DAG.getConstant(sizeof(int), dl, + StackSlot.getValueType()); // set up Hi and Lo (into buffer) address based on endian SDValue Hi = StackSlot; SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), @@ -2416,7 +2430,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0Mapped; if (isSigned) { // constant used to invert sign bit (signed to unsigned mapping) - SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32); + SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32); Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); } else { Op0Mapped = Op0; @@ -2426,7 +2440,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, Op0Mapped, Lo, MachinePointerInfo(), false, false, 0); // initial hi portion of constructed double - SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32); + SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32); // store the hi of the constructed double - biased exponent SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo(), @@ -2438,7 +2452,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : BitsToDouble(0x4330000000000000ULL), - MVT::f64); + dl, MVT::f64); // subtract the bias SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); // final result @@ -2449,7 +2463,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, Result = Sub; } else if (DestVT.bitsLT(MVT::f64)) { Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); } else if (DestVT.bitsGT(MVT::f64)) { Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); } @@ -2465,15 +2479,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { SDValue TwoP52 = - DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64); + DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64); SDValue TwoP84PlusTwoP52 = - DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64); + DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl, + MVT::f64); SDValue TwoP84 = - DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64); + DAG.getConstant(UINT64_C(0x4530000000000000), dl, MVT::i64); SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32); SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, - DAG.getConstant(32, MVT::i64)); + DAG.getConstant(32, dl, MVT::i64)); SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52); SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr); @@ -2492,9 +2507,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); SDValue ShiftConst = - DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType())); + DAG.getConstant(1, dl, TLI.getShiftAmountTy(Op0.getValueType())); SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); - SDValue AndConst = DAG.getConstant(1, MVT::i64); + SDValue AndConst = DAG.getConstant(1, dl, MVT::i64); SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr); @@ -2506,47 +2521,52 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // thing most of the time. This would be a good candidate for a //pseudo-op, or, even better, for whole-function isel. SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), - Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT); + Op0, DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast); } // Otherwise, implement the fully general conversion. SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, - DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); + DAG.getConstant(UINT64_C(0xfffffffffffff800), dl, MVT::i64)); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, - DAG.getConstant(UINT64_C(0x800), MVT::i64)); + DAG.getConstant(UINT64_C(0x800), dl, MVT::i64)); SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, - DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); - SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), - And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); + DAG.getConstant(UINT64_C(0x7ff), dl, MVT::i64)); + SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2, + DAG.getConstant(UINT64_C(0), dl, MVT::i64), + ISD::SETNE); SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0); - SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), - Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), - ISD::SETUGE); + SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, + DAG.getConstant(UINT64_C(0x0020000000000000), dl, + MVT::i64), + ISD::SETUGE); SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0); EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType()); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, - DAG.getConstant(32, SHVT)); + DAG.getConstant(32, dl, SHVT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc); SDValue TwoP32 = - DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64); + DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl, + MVT::f64); SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt); SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2); SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo); SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); } SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()), - Op0, DAG.getConstant(0, Op0.getValueType()), + Op0, + DAG.getConstant(0, dl, Op0.getValueType()), ISD::SETLT); - SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4); + SDValue Zero = DAG.getIntPtrConstant(0, dl), + Four = DAG.getIntPtrConstant(4, dl); SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero); @@ -2681,34 +2701,41 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: - Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); case MVT::i32: - Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT)); - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT)); + Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, + DAG.getConstant(0xFF0000, dl, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); case MVT::i64: - Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT)); - Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT)); - Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); - Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); - Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); - Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT)); - Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT)); - Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT)); - Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT)); - Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT)); - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT)); + Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, + DAG.getConstant(255ULL<<48, dl, VT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, + DAG.getConstant(255ULL<<40, dl, VT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, + DAG.getConstant(255ULL<<32, dl, VT)); + Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, + DAG.getConstant(255ULL<<24, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, + DAG.getConstant(255ULL<<16, dl, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, + DAG.getConstant(255ULL<<8 , dl, VT)); Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); @@ -2735,34 +2762,38 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // This is the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel - SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), VT); - SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), VT); - SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), VT); - SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), VT); + SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), + dl, VT); + SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), + dl, VT); + SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), + dl, VT); + SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), + dl, VT); // v = v - ((v >> 1) & 0x55555555...) Op = DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::SRL, dl, VT, Op, - DAG.getConstant(1, ShVT)), + DAG.getConstant(1, dl, ShVT)), Mask55)); // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33), DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::SRL, dl, VT, Op, - DAG.getConstant(2, ShVT)), + DAG.getConstant(2, dl, ShVT)), Mask33)); // v = (v + (v >> 4)) & 0x0F0F0F0F... Op = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, - DAG.getConstant(4, ShVT))), + DAG.getConstant(4, dl, ShVT))), Mask0F); // v = (v * 0x01010101...) >> (Len - 8) Op = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), - DAG.getConstant(Len - 8, ShVT)); + DAG.getConstant(Len - 8, dl, ShVT)); return Op; } @@ -2783,7 +2814,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, EVT ShVT = TLI.getShiftAmountTy(VT); unsigned len = VT.getSizeInBits(); for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { - SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); + SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT); Op = DAG.getNode(ISD::OR, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); } @@ -2802,12 +2833,12 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), DAG.getNode(ISD::SUB, dl, VT, Op, - DAG.getConstant(1, VT))); + DAG.getConstant(1, dl, VT))); // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) return DAG.getNode(ISD::SUB, dl, VT, - DAG.getConstant(VT.getSizeInBits(), VT), + DAG.getConstant(VT.getSizeInBits(), dl, VT), DAG.getNode(ISD::CTLZ, dl, VT, Tmp3)); return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3); } @@ -2817,132 +2848,8 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC; - - switch (Opc) { - default: - llvm_unreachable("Unhandled atomic intrinsic Expand!"); - case ISD::ATOMIC_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; - case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; - case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; - case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; - case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; - } - break; - case ISD::ATOMIC_CMP_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; - case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; - case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; - case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; - case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; - } - break; - case ISD::ATOMIC_LOAD_ADD: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; - } - break; - case ISD::ATOMIC_LOAD_SUB: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; - } - break; - case ISD::ATOMIC_LOAD_AND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; - } - break; - case ISD::ATOMIC_LOAD_OR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; - } - break; - case ISD::ATOMIC_LOAD_XOR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; - } - break; - case ISD::ATOMIC_LOAD_NAND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; - } - break; - case ISD::ATOMIC_LOAD_MAX: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break; - } - break; - case ISD::ATOMIC_LOAD_UMAX: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break; - } - break; - case ISD::ATOMIC_LOAD_MIN: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break; - } - break; - case ISD::ATOMIC_LOAD_UMIN: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break; - } - break; - } + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); return ExpandChainLibCall(LC, Node, false); } @@ -2967,10 +2874,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::FRAMEADDR: case ISD::RETURNADDR: case ISD::FRAME_TO_ARGS_OFFSET: - Results.push_back(DAG.getConstant(0, Node->getValueType(0))); + Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); break; case ISD::FLT_ROUNDS_: - Results.push_back(DAG.getConstant(1, Node->getValueType(0))); + Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); break; case ISD::EH_RETURN: case ISD::EH_LABEL: @@ -2984,7 +2891,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::EH_SJLJ_SETJMP: // If the target didn't expand this, just return 'zero' and preserve the // chain. - Results.push_back(DAG.getConstant(0, MVT::i32)); + Results.push_back(DAG.getConstant(0, dl, MVT::i32)); Results.push_back(Node->getOperand(0)); break; case ISD::ATOMIC_FENCE: { @@ -3005,7 +2912,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::ATOMIC_LOAD: { // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. - SDValue Zero = DAG.getConstant(0, Node->getValueType(0)); + SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0)); SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); SDValue Swap = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, @@ -3081,10 +2988,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::UNDEF: { EVT VT = Node->getValueType(0); if (VT.isInteger()) - Results.push_back(DAG.getConstant(0, VT)); + Results.push_back(DAG.getConstant(0, dl, VT)); else { assert(VT.isFloatingPoint() && "Unknown value type!"); - Results.push_back(DAG.getConstantFP(0, VT)); + Results.push_back(DAG.getConstantFP(0, dl, VT)); } break; } @@ -3123,7 +3030,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ShiftAmountTy = VT; unsigned BitsDiff = VT.getScalarType().getSizeInBits() - ExtraVT.getScalarType().getSizeInBits(); - SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy); + SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy); Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), Node->getOperand(0), ShiftCst); Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); @@ -3161,7 +3068,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { APInt::getNullValue(VT.getSizeInBits())); APInt x = APInt::getSignBit(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); - Tmp1 = DAG.getConstantFP(apf, VT); + Tmp1 = DAG.getConstantFP(apf, dl, VT); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT), Node->getOperand(0), Tmp1, ISD::SETLT); @@ -3170,7 +3077,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getNode(ISD::FSUB, dl, VT, Node->getOperand(0), Tmp1)); False = DAG.getNode(ISD::XOR, dl, NVT, False, - DAG.getConstant(x, NVT)); + DAG.getConstant(x, dl, NVT)); Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False); Results.push_back(Tmp1); break; @@ -3191,11 +3098,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(Align - 1, + DAG.getConstant(Align - 1, dl, VAList.getValueType())); VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList, - DAG.getConstant(-(int64_t)Align, + DAG.getConstant(-(int64_t)Align, dl, VAList.getValueType())); } @@ -3203,6 +3110,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, DAG.getConstant(TLI.getDataLayout()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), + dl, VAList.getValueType())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, @@ -3317,11 +3225,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (Idx < NumElems) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, - DAG.getConstant(Idx, TLI.getVectorIdxTy()))); + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()))); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, - DAG.getConstant(Idx - NumElems, + DAG.getConstant(Idx - NumElems, dl, TLI.getVectorIdxTy()))); } @@ -3336,7 +3244,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { // 1 -> Hi Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), - DAG.getConstant(OpTy.getSizeInBits()/2, + DAG.getConstant(OpTy.getSizeInBits()/2, dl, TLI.getShiftAmountTy(Node->getOperand(0).getValueType()))); Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); } else { @@ -3374,7 +3282,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; case ISD::FNEG: // Expand Y = FNEG(X) -> Y = SUB -0.0, X - Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0)); + Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, Node->getOperand(0)); Results.push_back(Tmp1); @@ -3383,7 +3291,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); - Tmp2 = DAG.getConstantFP(0.0, VT); + Tmp2 = DAG.getConstantFP(0.0, dl, VT); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, ISD::SETUGT); Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); @@ -3391,6 +3299,26 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: { + // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B + ISD::CondCode Pred; + switch (Node->getOpcode()) { + default: llvm_unreachable("How did we get here?"); + case ISD::SMAX: Pred = ISD::SETGT; break; + case ISD::SMIN: Pred = ISD::SETLT; break; + case ISD::UMAX: Pred = ISD::SETUGT; break; + case ISD::UMIN: Pred = ISD::SETULT; break; + } + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp1, Tmp2, Pred); + Results.push_back(Tmp1); + break; + } + case ISD::FMINNUM: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, RTLIB::FMIN_F80, RTLIB::FMIN_F128, @@ -3519,6 +3447,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); break; + case ISD::FMAD: + llvm_unreachable("Illegal fmad should never be formed"); + case ISD::FADD: Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, RTLIB::ADD_F80, RTLIB::ADD_F128, @@ -3545,6 +3476,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::FP_TO_FP16: { + if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { + SDValue Op = Node->getOperand(0); + MVT SVT = Op.getSimpleValueType(); + if ((SVT == MVT::f64 || SVT == MVT::f80) && + TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) { + // Under fastmath, we can expand this node into a fround followed by + // a float-half conversion. + SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, + DAG.getIntPtrConstant(0, dl)); + Results.push_back( + DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal)); + break; + } + } + RTLIB::Libcall LC = RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); @@ -3579,8 +3525,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.isOperationLegalOrCustom(ISD::XOR, VT) && "Don't know how to expand this subtraction!"); Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); - Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, VT)); + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, + VT)); + Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT)); Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); break; } @@ -3696,7 +3643,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) { Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); - SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), + SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), dl, TLI.getShiftAmountTy(HalfType)); Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); @@ -3721,7 +3668,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT ResultType = Node->getValueType(1); EVT OType = getSetCCResultType(Node->getValueType(0)); - SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); // LHSSign -> LHS >= 0 // RHSSign -> RHS >= 0 @@ -3787,9 +3734,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, - DAG.getIntPtrConstant(1)); + DAG.getIntPtrConstant(1, dl)); } else { // We can fall back to a libcall with an illegal type for the MUL if we // have a libcall big enough. @@ -3810,9 +3757,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // part. unsigned LoSize = VT.getSizeInBits(); SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS, - DAG.getConstant(LoSize-1, TLI.getPointerTy())); + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy())); SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS, - DAG.getConstant(LoSize-1, TLI.getPointerTy())); + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy())); // Here we're passing the 2 arguments explicitly as 4 arguments that are // pre-lowered to the correct types. This all depends upon WideVT not @@ -3821,9 +3770,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(1)); + DAG.getIntPtrConstant(1, dl)); // Ret is a node with an illegal type. Because such things are not // generally permitted during this phase of legalization, make sure the // node has no more uses. The above EXTRACT_ELEMENT nodes should have been @@ -3833,14 +3782,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } if (isSigned) { - Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, + Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, dl, TLI.getShiftAmountTy(BottomHalf.getValueType())); Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1, ISD::SETNE); } else { TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, - DAG.getConstant(0, VT), ISD::SETNE); + DAG.getConstant(0, dl, VT), ISD::SETNE); } Results.push_back(BottomHalf); Results.push_back(TopHalf); @@ -3851,7 +3800,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2, - DAG.getConstant(PairTy.getSizeInBits()/2, + DAG.getConstant(PairTy.getSizeInBits()/2, dl, TLI.getShiftAmountTy(PairTy))); Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); break; @@ -3866,7 +3815,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { cast<CondCodeSDNode>(Tmp1.getOperand(2))->get()); } else { Tmp1 = DAG.getSelectCC(dl, Tmp1, - DAG.getConstant(0, Tmp1.getValueType()), + DAG.getConstant(0, dl, Tmp1.getValueType()), Tmp2, Tmp3, ISD::SETNE); } Results.push_back(Tmp1); @@ -3882,8 +3831,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), - Index, DAG.getConstant(EntrySize, Index.getValueType())); + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EntrySize, dl, Index.getValueType())); SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, Table); @@ -3917,10 +3866,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // We test only the i1 bit. Skip the AND if UNDEF. Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 : DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, - DAG.getConstant(1, Tmp2.getValueType())); + DAG.getConstant(1, dl, Tmp2.getValueType())); Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, DAG.getCondCode(ISD::SETNE), Tmp3, - DAG.getConstant(0, Tmp3.getValueType()), + DAG.getConstant(0, dl, Tmp3.getValueType()), Node->getOperand(2)); } Results.push_back(Tmp1); @@ -3962,7 +3911,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, - DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT), + DAG.getConstant(TrueValue, dl, VT), + DAG.getConstant(0, dl, VT), Tmp3); Results.push_back(Tmp1); break; @@ -4030,7 +3980,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Tmp4, CC); } else { - Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + Tmp2 = DAG.getConstant(0, dl, Tmp1.getValueType()); CC = DAG.getCondCode(ISD::SETNE); Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Tmp4, CC); @@ -4061,7 +4011,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, Tmp3, Node->getOperand(4)); } else { - Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType()); Tmp4 = DAG.getCondCode(ISD::SETNE); Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, Tmp3, Node->getOperand(4)); @@ -4085,12 +4035,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), - Node->getOperand(0), DAG.getConstant(Idx, - TLI.getVectorIdxTy())); + Node->getOperand(0), + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())); SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), - Node->getOperand(1), DAG.getConstant(Idx, - TLI.getVectorIdxTy())); + Node->getOperand(1), + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())); Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, VT.getScalarType(), Ex, Sh)); } @@ -4125,6 +4075,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::SETCC) { OVT = Node->getOperand(0).getSimpleValueType(); } + if (Node->getOpcode() == ISD::BR_CC) + OVT = Node->getOperand(2).getSimpleValueType(); MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3; @@ -4142,16 +4094,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { if (Node->getOpcode() == ISD::CTTZ) { // FIXME: This should set a bit in the zero extended value instead. Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), - Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), + Tmp1, DAG.getConstant(NVT.getSizeInBits(), dl, NVT), ISD::SETEQ); Tmp1 = DAG.getSelect(dl, NVT, Tmp2, - DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); + DAG.getConstant(OVT.getSizeInBits(), dl, NVT), Tmp1); } else if (Node->getOpcode() == ISD::CTLZ || Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - - OVT.getSizeInBits(), NVT)); + OVT.getSizeInBits(), dl, NVT)); } Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; @@ -4160,7 +4112,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1, - DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT))); + DAG.getConstant(DiffBits, dl, + TLI.getShiftAmountTy(NVT))); Results.push_back(Tmp1); break; } @@ -4250,7 +4203,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); else Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); Results.push_back(Tmp1); break; } @@ -4280,27 +4233,74 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1, Tmp2, Node->getOperand(2))); break; } + case ISD::BR_CC: { + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(1))->get(); + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + } + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3)); + Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), + Node->getOperand(0), Node->getOperand(1), + Tmp1, Tmp2, Node->getOperand(4))); + break; + } case ISD::FADD: case ISD::FSUB: case ISD::FMUL: case ISD::FDIV: case ISD::FREM: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FCOPYSIGN: case ISD::FPOW: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp3, DAG.getIntPtrConstant(0))); + Tmp3, DAG.getIntPtrConstant(0, dl))); break; } - case ISD::FLOG2: - case ISD::FEXP2: + case ISD::FMA: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2)); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, + DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), + DAG.getIntPtrConstant(0, dl))); + break; + } + case ISD::FPOWI: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = Node->getOperand(1); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(0, dl))); + break; + } + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FROUND: + case ISD::FTRUNC: + case ISD::FNEG: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: case ISD::FLOG: - case ISD::FEXP: { + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FABS: + case ISD::FEXP: + case ISD::FEXP2: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp2, DAG.getIntPtrConstant(0))); + Tmp2, DAG.getIntPtrConstant(0, dl))); break; } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b596715..37fdf44 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -131,7 +131,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { - return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), + return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N), TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); } @@ -149,8 +149,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { // Mask = ~(1 << (Size-1)) APInt API = APInt::getAllOnesValue(Size); - API.clearBit(Size-1); - SDValue Mask = DAG.getConstant(API, NVT); + API.clearBit(Size - 1); + SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask); } @@ -218,8 +218,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { unsigned RSize = RVT.getSizeInBits(); // First get the sign bit of second operand. - SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT), - DAG.getConstant(RSize - 1, + SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT), + DAG.getConstant(RSize - 1, dl, TLI.getShiftAmountTy(RVT))); SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); @@ -227,21 +227,21 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); if (SizeDiff > 0) { SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit, - DAG.getConstant(SizeDiff, + DAG.getConstant(SizeDiff, dl, TLI.getShiftAmountTy(SignBit.getValueType()))); SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); } else if (SizeDiff < 0) { SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit, - DAG.getConstant(-SizeDiff, + DAG.getConstant(-SizeDiff, dl, TLI.getShiftAmountTy(SignBit.getValueType()))); } // Clear the sign bit of the first operand. - SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT), - DAG.getConstant(LSize - 1, + SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT), + DAG.getConstant(LSize - 1, dl, TLI.getShiftAmountTy(LVT))); - Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT)); + Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT)); LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); // Or the value with the sign bit. @@ -386,8 +386,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); // Expand Y = FNEG(X) -> Y = SUB -0.0, X - SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)), + SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), GetSoftenedFloat(N->getOperand(0)) }; return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, @@ -395,7 +396,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, 2, false, dl).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -755,7 +756,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!NewRHS.getNode()) { - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -794,7 +795,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!NewRHS.getNode()) { - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -837,7 +838,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { if (ST->isTruncatingStore()) // Do an FP_ROUND followed by a non-truncating store. Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), - Val, DAG.getIntPtrConstant(0))); + Val, DAG.getIntPtrConstant(0, dl))); else Val = GetSoftenedFloat(Val); @@ -927,12 +928,13 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, assert(NVT.getSizeInBits() == integerPartWidth && "Do not know how to expand this float constant!"); APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt(); + SDLoc dl(N); Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), APInt(integerPartWidth, C.getRawData()[1])), - NVT); + dl, NVT); Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), APInt(integerPartWidth, C.getRawData()[0])), - NVT); + dl, NVT); } void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, @@ -1136,9 +1138,10 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - Hi = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), NVT, N->getOperand(0)); + SDLoc dl(N); + Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0)); Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), - APInt(NVT.getSizeInBits(), 0)), NVT); + APInt(NVT.getSizeInBits(), 0)), dl, NVT); } void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, @@ -1262,7 +1265,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, // The low part is zero. Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), - APInt(NVT.getSizeInBits(), 0)), NVT); + APInt(NVT.getSizeInBits(), 0)), dl, NVT); // Modified the chain - switch anything that used the old chain to use the // new one. @@ -1287,7 +1290,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, MVT::i32, Src); Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), - APInt(NVT.getSizeInBits(), 0)), NVT); + APInt(NVT.getSizeInBits(), 0)), dl, NVT); Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); } else { RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1335,8 +1338,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, APInt(128, Parts)), - MVT::ppcf128)); - Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, SrcVT), + dl, MVT::ppcf128)); + Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT), Lo, Hi, ISD::SETLT); GetPairElements(Lo, Lo, Hi); } @@ -1436,7 +1439,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!NewRHS.getNode()) { - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -1479,7 +1482,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128, N->getOperand(0), DAG.getValueType(MVT::f64)); Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res, - DAG.getIntPtrConstant(1)); + DAG.getIntPtrConstant(1, dl)); return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); } @@ -1499,7 +1502,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { "Logic only correct for ppcf128!"); const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31)); - SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); + SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. return DAG.getSelectCC(dl, N->getOperand(0), Tmp, @@ -1509,7 +1512,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { MVT::ppcf128, N->getOperand(0), Tmp)), - DAG.getConstant(0x80000000, MVT::i32)), + DAG.getConstant(0x80000000, dl, + MVT::i32)), DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, N->getOperand(0)), ISD::SETGE); @@ -1529,7 +1533,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!NewRHS.getNode()) { - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -1579,3 +1583,420 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr, ST->getMemoryVT(), ST->getMemOperand()); } + +//===----------------------------------------------------------------------===// +// Float Operand Promotion +//===----------------------------------------------------------------------===// +// + +static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f16) { + return ISD::FP16_TO_FP; + } else if (RetVT == MVT::f16) { + return ISD::FP_TO_FP16; + } + + report_fatal_error("Attempt at an invalid promotion-related conversion"); +} + +bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { + SDValue R = SDValue(); + + // Nodes that use a promotion-requiring floating point operand, but doesn't + // produce a promotion-requiring floating point result, need to be legalized + // to use the promoted float operand. Nodes that produce at least one + // promotion-requiring floating point result have their operands legalized as + // a part of PromoteFloatResult. + switch (N->getOpcode()) { + default: + llvm_unreachable("Do not know how to promote this operator's operand!"); + + case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break; + case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break; + case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break; + case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; + case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; + } + + if (R.getNode()) + ReplaceValueWith(SDValue(N, 0), R); + return false; +} + +SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) { + SDValue Op = N->getOperand(0); + EVT OpVT = Op->getValueType(0); + + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + assert (IVT == N->getValueType(0) && "Bitcast to type of different size"); + + SDValue Promoted = GetPromotedFloat(N->getOperand(0)); + EVT PromotedVT = Promoted->getValueType(0); + + // Convert the promoted float value to the desired IVT. + return DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), IVT, + Promoted); +} + +// Promote Operand 1 of FCOPYSIGN. Operand 0 ought to be handled by +// PromoteFloatRes_FCOPYSIGN. +SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) { + assert (OpNo == 1 && "Only Operand 1 must need promotion here"); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), Op1); +} + +// Convert the promoted float value to the desired integer type +SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) { + SDValue Op = GetPromotedFloat(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) { + SDValue Op = GetPromotedFloat(N->getOperand(0)); + EVT VT = N->getValueType(0); + + // Desired VT is same as promoted type. Use promoted float directly. + if (VT == Op->getValueType(0)) + return Op; + + // Else, extend the promoted float value to the desired VT. + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op); +} + +// Promote the float operands used for comparison. The true- and false- +// operands have the same type as the result and are promoted, if needed, by +// PromoteFloatRes_SELECT_CC +SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) { + SDValue LHS = GetPromotedFloat(N->getOperand(0)); + SDValue RHS = GetPromotedFloat(N->getOperand(1)); + + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), + LHS, RHS, N->getOperand(2), N->getOperand(3), + N->getOperand(4)); +} + +// Construct a SETCC that compares the promoted values and sets the conditional +// code. +SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + + return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode); + +} + +// Lower the promoted Float down to the integer value of same size and construct +// a STORE of the integer value. +SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Val = ST->getValue(); + SDLoc DL(N); + + SDValue Promoted = GetPromotedFloat(Val); + EVT VT = ST->getOperand(1)->getValueType(0); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + + SDValue NewVal; + NewVal = DAG.getNode(GetPromotionOpcode(Promoted.getValueType(), VT), DL, + IVT, Promoted); + + return DAG.getStore(ST->getChain(), DL, NewVal, ST->getBasePtr(), + ST->getMemOperand()); +} + +//===----------------------------------------------------------------------===// +// Float Result Promotion +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { + SDValue R = SDValue(); + + switch (N->getOpcode()) { + // These opcodes cannot appear if promotion of FP16 is done in the backend + // instead of Clang + case ISD::FP16_TO_FP: + case ISD::FP_TO_FP16: + default: + llvm_unreachable("Do not know how to promote this operator's result!"); + + case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break; + case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break; + case ISD::EXTRACT_VECTOR_ELT: + R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FCOPYSIGN: R = PromoteFloatRes_FCOPYSIGN(N); break; + + // Unary FP Operations + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: R = PromoteFloatRes_UnaryOp(N); break; + + // Binary FP Operations + case ISD::FADD: + case ISD::FDIV: + case ISD::FMAXNUM: + case ISD::FMINNUM: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break; + + case ISD::FMA: // FMA is same as FMAD + case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break; + + case ISD::FPOWI: R = PromoteFloatRes_FPOWI(N); break; + + case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break; + case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break; + case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break; + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break; + + } + + if (R.getNode()) + SetPromotedFloat(SDValue(N, ResNo), R); +} + +// Bitcast from i16 to f16: convert the i16 to a f32 value instead. +// At this point, it is not possible to determine if the bitcast value is +// eventually stored to memory or promoted to f32 or promoted to a floating +// point at a higher precision. Some of these cases are handled by FP_EXTEND, +// STORE promotion handlers. +SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, + N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) { + ConstantFPSDNode *CFPNode = cast<ConstantFPSDNode>(N); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Get the (bit-cast) APInt of the APFloat and build an integer constant + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + SDValue C = DAG.getConstant(CFPNode->getValueAPF().bitcastToAPInt(), DL, + IVT); + + // Convert the Constant to the desired FP type + // FIXME We might be able to do the conversion during compilation and get rid + // of it from the object code + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, C); +} + +// If the Index operand is a constant, try to redirect the extract operation to +// the correct legalized vector. If not, bit-convert the input vector to +// equivalent integer vector. Extract the element as an (bit-cast) integer +// value and convert it to the promoted type. +SDValue DAGTypeLegalizer::PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDLoc DL(N); + + // If the index is constant, try to extract the value from the legalized + // vector type. + if (isa<ConstantSDNode>(N->getOperand(1))) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + EVT VecVT = Vec->getValueType(0); + EVT EltVT = VecVT.getVectorElementType(); + + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + switch (getTypeAction(VecVT)) { + default: break; + case TargetLowering::TypeScalarizeVector: { + SDValue Res = GetScalarizedVector(N->getOperand(0)); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + case TargetLowering::TypeWidenVector: { + Vec = GetWidenedVector(Vec); + SDValue Res = DAG.getNode(N->getOpcode(), DL, EltVT, Vec, Idx); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + case TargetLowering::TypeSplitVector: { + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + SDValue Res; + if (IdxVal < LoElts) + Res = DAG.getNode(N->getOpcode(), DL, EltVT, Lo, Idx); + else + Res = DAG.getNode(N->getOpcode(), DL, EltVT, Hi, + DAG.getConstant(IdxVal - LoElts, DL, + Idx.getValueType())); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + + } + } + + // Bit-convert the input vector to the equivalent integer vector + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + EVT IVT = NewOp.getValueType().getVectorElementType(); + + // Extract the element as an (bit-cast) integer value + SDValue NewVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IVT, + NewOp, N->getOperand(1)); + + // Convert the element to the desired FP type + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, NewVal); +} + +// FCOPYSIGN(X, Y) returns the value of X with the sign of Y. If the result +// needs promotion, so does the argument X. Note that Y, if needed, will be +// handled during operand promotion. +SDValue DAGTypeLegalizer::PromoteFloatRes_FCOPYSIGN(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + + SDValue Op1 = N->getOperand(1); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +// Unary operation where the result and the operand have PromoteFloat type +// action. Construct a new SDNode with the promoted float value of the old +// operand. +SDValue DAGTypeLegalizer::PromoteFloatRes_UnaryOp(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op = GetPromotedFloat(N->getOperand(0)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op); +} + +// Binary operations where the result and both operands have PromoteFloat type +// action. Construct a new SDNode with the promoted float values of the old +// operands. +SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + SDValue Op2 = GetPromotedFloat(N->getOperand(2)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, Op2); +} + +// Promote the Float (first) operand and retain the Integer (second) operand +SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = N->getOperand(1); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +// Explicit operation to reduce precision. Reduce the value to half precision +// and promote it back to the legal type. +SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) { + SDLoc DL(N); + + SDValue Op = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT OpVT = Op->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + + // Round promoted float to desired precision + SDValue Round = DAG.getNode(GetPromotionOpcode(OpVT, VT), DL, IVT, Op); + // Promote it back to the legal output type + return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { + LoadSDNode *L = cast<LoadSDNode>(N); + EVT VT = N->getValueType(0); + + // Load the value as an integer value with the same number of bits + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), + IVT, SDLoc(N), L->getChain(), L->getBasePtr(), + L->getOffset(), L->getPointerInfo(), IVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment(), + L->getAAInfo()); + // Legalize the chain result by replacing uses of the old value chain with the + // new one + ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); + + // Convert the integer value to the desired FP type + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL); +} + +// Construct a new SELECT node with the promoted true- and false- values. +SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) { + SDValue TrueVal = GetPromotedFloat(N->getOperand(1)); + SDValue FalseVal = GetPromotedFloat(N->getOperand(2)); + + return DAG.getNode(ISD::SELECT, SDLoc(N), TrueVal->getValueType(0), + N->getOperand(0), TrueVal, FalseVal); +} + +// Construct a new SELECT_CC node with the promoted true- and false- values. +// The operands used for comparison are promoted by PromoteFloatOp_SELECT_CC. +SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) { + SDValue TrueVal = GetPromotedFloat(N->getOperand(2)); + SDValue FalseVal = GetPromotedFloat(N->getOperand(3)); + + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), + N->getOperand(0), N->getOperand(1), TrueVal, FalseVal, + N->getOperand(4)); +} + +// Construct a SDNode that transforms the SINT or UINT operand to the promoted +// float type. +SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a4e44cc..eeaebf78 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -71,6 +71,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: Res = PromoteIntRes_SimpleIntBinOp(N); break; case ISD::SHL: Res = PromoteIntRes_SHL(N); break; case ISD::SIGN_EXTEND_INREG: Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; @@ -251,6 +255,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { case TargetLowering::TypeSoftenFloat: // Promote the integer operand by hand. return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); + case TargetLowering::TypePromoteFloat: { + // Convert the promoted float by hand. + if (NOutVT.bitsEq(NInVT)) { + SDValue PromotedOp = GetPromotedFloat(InOp); + SDValue Trunc = DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp); + return DAG.getNode(ISD::AssertZext, dl, NOutVT, Trunc, + DAG.getValueType(OutVT)); + } + break; + } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: break; @@ -297,7 +311,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), - DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT))); + DAG.getConstant(DiffBits, dl, TLI.getShiftAmountTy(NVT))); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { @@ -345,7 +359,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { // Subtract off the extra leading bits in the bigger type. return DAG.getNode( ISD::SUB, dl, NVT, Op, - DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), + DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT)); } @@ -366,7 +380,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { // the top of the original type. auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(), OVT.getScalarSizeInBits()); - Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT)); } return DAG.getNode(N->getOpcode(), dl, NVT, Op); } @@ -723,9 +737,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { if (N->getOpcode() == ISD::UMULO) { // Unsigned overflow occurred if the high part is non-zero. SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, - DAG.getIntPtrConstant(SmallVT.getSizeInBits())); + DAG.getIntPtrConstant(SmallVT.getSizeInBits(), + DL)); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, - DAG.getConstant(0, Hi.getValueType()), ISD::SETNE); + DAG.getConstant(0, DL, Hi.getValueType()), + ISD::SETNE); } else { // Signed overflow occurred if the high part does not sign extend the low. SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(), @@ -784,7 +800,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); // Shift it to the right position and "or" it in. Part = DAG.getNode(ISD::SHL, dl, NVT, Part, - DAG.getConstant(i * RegVT.getSizeInBits(), + DAG.getConstant(i*RegVT.getSizeInBits(), dl, TLI.getPointerTy())); Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part); } @@ -852,6 +868,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break; case ISD::SHL: case ISD::SRA: @@ -977,7 +994,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { SDLoc dl(N); Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, - DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy())); + DAG.getConstant(OVT.getSizeInBits(), dl, + TLI.getPointerTy())); return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi); } @@ -1116,7 +1134,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ - assert(OpNo == 2 && "Only know how to promote the mask!"); SDValue DataOp = N->getValue(); EVT DataVT = DataOp.getValueType(); SDValue Mask = N->getMask(); @@ -1127,7 +1144,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN if (!TLI.isTypeLegal(DataVT)) { if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) { DataOp = GetPromotedInteger(DataOp); - Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); + if (!TLI.isTypeLegal(MaskVT)) + Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); TruncateStore = true; } else { @@ -1147,7 +1165,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN unsigned NumConcat = WidenNumElts / MaskNumElts; SmallVector<SDValue, 16> Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, MaskVT); + SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); Ops[0] = Mask; for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = ZeroVal; @@ -1323,92 +1341,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC; - - switch (Opc) { - default: - llvm_unreachable("Unhandled atomic intrinsic Expand!"); - case ISD::ATOMIC_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; - case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; - case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; - case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; - case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; - } - break; - case ISD::ATOMIC_CMP_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; - case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; - case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; - case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; - case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; - } - break; - case ISD::ATOMIC_LOAD_ADD: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; - } - break; - case ISD::ATOMIC_LOAD_SUB: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; - } - break; - case ISD::ATOMIC_LOAD_AND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; - } - break; - case ISD::ATOMIC_LOAD_OR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; - } - break; - case ISD::ATOMIC_LOAD_XOR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; - } - break; - case ISD::ATOMIC_LOAD_NAND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; - } - break; - } + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); return ExpandChainLibCall(LC, Node, false); } @@ -1417,12 +1351,19 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { /// and the shift amount is a constant 'Amt'. Expand the operation. void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi) { - assert(Amt && "Expected zero shifts to be already optimized away."); SDLoc DL(N); // Expand the incoming operand to be shifted, so that we have its parts SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); + // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization + // splitted a vector shift, like this: <op1, op2> SHL <0, 2>. + if (!Amt) { + Lo = InL; + Hi = InH; + return; + } + EVT NVT = InL.getValueType(); unsigned VTBits = N->getValueType(0).getSizeInBits(); unsigned NVTBits = NVT.getSizeInBits(); @@ -1430,13 +1371,13 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, if (N->getOpcode() == ISD::SHL) { if (Amt > VTBits) { - Lo = Hi = DAG.getConstant(0, NVT); + Lo = Hi = DAG.getConstant(0, DL, NVT); } else if (Amt > NVTBits) { - Lo = DAG.getConstant(0, NVT); + Lo = DAG.getConstant(0, DL, NVT); Hi = DAG.getNode(ISD::SHL, DL, - NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy)); + NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy)); } else if (Amt == NVTBits) { - Lo = DAG.getConstant(0, NVT); + Lo = DAG.getConstant(0, DL, NVT); Hi = InL; } else if (Amt == 1 && TLI.isOperationLegalOrCustom(ISD::ADDC, @@ -1448,34 +1389,34 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps); } else { - Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)); + Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy)); Hi = DAG.getNode(ISD::OR, DL, NVT, DAG.getNode(ISD::SHL, DL, NVT, InH, - DAG.getConstant(Amt, ShTy)), + DAG.getConstant(Amt, DL, ShTy)), DAG.getNode(ISD::SRL, DL, NVT, InL, - DAG.getConstant(NVTBits-Amt, ShTy))); + DAG.getConstant(NVTBits - Amt, DL, ShTy))); } return; } if (N->getOpcode() == ISD::SRL) { if (Amt > VTBits) { - Lo = DAG.getConstant(0, NVT); - Hi = DAG.getConstant(0, NVT); + Lo = DAG.getConstant(0, DL, NVT); + Hi = DAG.getConstant(0, DL, NVT); } else if (Amt > NVTBits) { Lo = DAG.getNode(ISD::SRL, DL, - NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy)); - Hi = DAG.getConstant(0, NVT); + NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy)); + Hi = DAG.getConstant(0, DL, NVT); } else if (Amt == NVTBits) { Lo = InH; - Hi = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, DL, NVT); } else { Lo = DAG.getNode(ISD::OR, DL, NVT, DAG.getNode(ISD::SRL, DL, NVT, InL, - DAG.getConstant(Amt, ShTy)), + DAG.getConstant(Amt, DL, ShTy)), DAG.getNode(ISD::SHL, DL, NVT, InH, - DAG.getConstant(NVTBits-Amt, ShTy))); - Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy)); + DAG.getConstant(NVTBits - Amt, DL, ShTy))); + Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)); } return; } @@ -1483,23 +1424,23 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); if (Amt > VTBits) { Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(NVTBits-1, ShTy)); + DAG.getConstant(NVTBits - 1, DL, ShTy)); } else if (Amt > NVTBits) { Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(Amt-NVTBits, ShTy)); + DAG.getConstant(Amt-NVTBits, DL, ShTy)); Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(NVTBits-1, ShTy)); + DAG.getConstant(NVTBits - 1, DL, ShTy)); } else if (Amt == NVTBits) { Lo = InH; Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(NVTBits-1, ShTy)); + DAG.getConstant(NVTBits - 1, DL, ShTy)); } else { Lo = DAG.getNode(ISD::OR, DL, NVT, DAG.getNode(ISD::SRL, DL, NVT, InL, - DAG.getConstant(Amt, ShTy)), + DAG.getConstant(Amt, DL, ShTy)), DAG.getNode(ISD::SHL, DL, NVT, InH, - DAG.getConstant(NVTBits-Amt, ShTy))); - Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy)); + DAG.getConstant(NVTBits - Amt, DL, ShTy))); + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)); } } @@ -1535,21 +1476,21 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { if (KnownOne.intersects(HighBitMask)) { // Mask out the high bit, which we know is set. Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, - DAG.getConstant(~HighBitMask, ShTy)); + DAG.getConstant(~HighBitMask, dl, ShTy)); switch (N->getOpcode()) { default: llvm_unreachable("Unknown shift"); case ISD::SHL: - Lo = DAG.getConstant(0, NVT); // Low part is zero. + Lo = DAG.getConstant(0, dl, NVT); // Low part is zero. Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. return true; case ISD::SRL: - Hi = DAG.getConstant(0, NVT); // Hi part is zero. + Hi = DAG.getConstant(0, dl, NVT); // Hi part is zero. Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. return true; case ISD::SRA: Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. - DAG.getConstant(NVTBits-1, ShTy)); + DAG.getConstant(NVTBits - 1, dl, ShTy)); Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. return true; } @@ -1562,7 +1503,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { // shift if x is zero. We can use XOR here because x is known to be smaller // than 32. SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt, - DAG.getConstant(NVTBits-1, ShTy)); + DAG.getConstant(NVTBits - 1, dl, ShTy)); unsigned Op1, Op2; switch (N->getOpcode()) { @@ -1578,7 +1519,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { // Use a little trick to get the bits that move from Lo to Hi. First // shift by one bit. - SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy)); + SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, dl, ShTy)); // Then compute the remaining shift with amount-1. SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2); @@ -1609,11 +1550,14 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); - SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy); + SDValue NVBitsNode = DAG.getConstant(NVTBits, dl, ShTy); SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode); SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy), Amt, NVBitsNode, ISD::SETULT); + SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy), + Amt, DAG.getConstant(0, dl, ShTy), + ISD::SETEQ); SDValue LoS, HiS, LoL, HiL; switch (N->getOpcode()) { @@ -1623,16 +1567,15 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); HiS = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(ISD::SHL, dl, NVT, InH, Amt), - // FIXME: If Amt is zero, the following shift generates an undefined result - // on some architectures. DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack)); // Long: ShAmt >= NVTBits - LoL = DAG.getConstant(0, NVT); // Lo part is zero. + LoL = DAG.getConstant(0, dl, NVT); // Lo part is zero. HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); - Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); + Hi = DAG.getSelect(dl, NVT, isZero, InH, + DAG.getSelect(dl, NVT, isShort, HiS, HiL)); return true; case ISD::SRL: // Short: ShAmt < NVTBits @@ -1644,10 +1587,11 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); // Long: ShAmt >= NVTBits - HiL = DAG.getConstant(0, NVT); // Hi part is zero. + HiL = DAG.getConstant(0, dl, NVT); // Hi part is zero. LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Lo = DAG.getSelect(dl, NVT, isZero, InL, + DAG.getSelect(dl, NVT, isShort, LoS, LoL)); Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; case ISD::SRA: @@ -1655,16 +1599,15 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); LoS = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), - // FIXME: If Amt is zero, the following shift generates an undefined result - // on some architectures. DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); // Long: ShAmt >= NVTBits HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part. - DAG.getConstant(NVTBits-1, ShTy)); + DAG.getConstant(NVTBits - 1, dl, ShTy)); LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Lo = DAG.getSelect(dl, NVT, isZero, InL, + DAG.getSelect(dl, NVT, isShort, LoS, LoL)); Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; } @@ -1706,18 +1649,50 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, return; } + bool hasOVF = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::UADDO : ISD::USUBO, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (hasOVF) { + SDVTList VTList = DAG.getVTList(NVT, NVT); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); + int RevOpc; + if (N->getOpcode() == ISD::ADD) { + RevOpc = ISD::SUB; + Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); + Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); + } else { + RevOpc = ISD::ADD; + Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); + Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); + } + SDValue OVF = Lo.getValue(1); + + switch (BoolType) { + case TargetLoweringBase::UndefinedBooleanContent: + OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF); + // Fallthrough + case TargetLoweringBase::ZeroOrOneBooleanContent: + Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF); + break; + case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF); + } + return; + } + if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps); Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); + DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1], ISD::SETULT); SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2, - DAG.getConstant(1, NVT), Carry1); + DAG.getConstant(1, dl, NVT), Carry1); Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); } else { Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps); @@ -1726,8 +1701,8 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); + DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } @@ -1824,7 +1799,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part replicates the sign bit of Lo, make it explicit. Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(NVTBits-1, TLI.getPointerTy())); + DAG.getConstant(NVTBits - 1, dl, TLI.getPointerTy())); } } @@ -1844,7 +1819,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, } else { Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part must be zero, make it explicit. - Hi = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, dl, NVT); } } @@ -1864,8 +1839,9 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, const APInt &Cst = Constant->getAPIntValue(); bool IsTarget = Constant->isTargetOpcode(); bool IsOpaque = Constant->isOpaque(); - Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT, IsTarget, IsOpaque); - Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT, IsTarget, + SDLoc dl(N); + Lo = DAG.getConstant(Cst.trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque); + Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque); } @@ -1877,15 +1853,16 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, EVT NVT = Lo.getValueType(); SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi, - DAG.getConstant(0, NVT), ISD::SETNE); + DAG.getConstant(0, dl, NVT), ISD::SETNE); SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ, DAG.getNode(ISD::ADD, dl, NVT, LoLZ, - DAG.getConstant(NVT.getSizeInBits(), NVT))); - Hi = DAG.getConstant(0, NVT); + DAG.getConstant(NVT.getSizeInBits(), dl, + NVT))); + Hi = DAG.getConstant(0, dl, NVT); } void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, @@ -1896,7 +1873,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, EVT NVT = Lo.getValueType(); Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo), DAG.getNode(ISD::CTPOP, dl, NVT, Hi)); - Hi = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, dl, NVT); } void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, @@ -1907,22 +1884,27 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, EVT NVT = Lo.getValueType(); SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, - DAG.getConstant(0, NVT), ISD::SETNE); + DAG.getConstant(0, dl, NVT), ISD::SETNE); SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ, DAG.getNode(ISD::ADD, dl, NVT, HiLZ, - DAG.getConstant(NVT.getSizeInBits(), NVT))); - Hi = DAG.getConstant(0, NVT); + DAG.getConstant(NVT.getSizeInBits(), dl, + NVT))); + Hi = DAG.getConstant(0, dl, NVT); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) + Op = GetPromotedFloat(Op); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, @@ -1934,7 +1916,11 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) + Op = GetPromotedFloat(Op); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, @@ -1980,10 +1966,10 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // lo part. unsigned LoSize = Lo.getValueType().getSizeInBits(); Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(LoSize-1, TLI.getPointerTy())); + DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy())); } else if (ExtType == ISD::ZEXTLOAD) { // The high part is just a zero. - Hi = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, dl, NVT); } else { assert(ExtType == ISD::EXTLOAD && "Unknown extload!"); // The high part is undefined. @@ -2002,7 +1988,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, isInvariant, @@ -2029,7 +2015,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -2046,12 +2032,12 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Transfer low bits from the bottom of Hi to the top of Lo. Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, DAG.getNode(ISD::SHL, dl, NVT, Hi, - DAG.getConstant(ExcessBits, + DAG.getConstant(ExcessBits, dl, TLI.getPointerTy()))); // Move high bits to the right position in Hi. Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT, Hi, - DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl, TLI.getPointerTy())); } } @@ -2127,7 +2113,7 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) // EVT OType = Node->getValueType(1); - SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); @@ -2148,6 +2134,13 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(0), Lo, Hi); + return; + } RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2160,7 +2153,6 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } @@ -2276,7 +2268,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, // The high part is obtained by SRA'ing all but one of the bits of low part. unsigned LoSize = NVT.getSizeInBits(); Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(LoSize-1, TLI.getPointerTy())); + DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy())); } else { // For example, extension of an i48 to an i64. The operand type necessarily // promotes to the result type, so will end up being expanded too. @@ -2310,7 +2302,7 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { // The high part gets the sign extension from the lo-part. This handles // things like sextinreg V:i64 from i8. Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo, - DAG.getConstant(Hi.getValueType().getSizeInBits()-1, + DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl, TLI.getPointerTy())); } else { // For example, extension of an i48 to an i64. Leave the low part alone, @@ -2327,6 +2319,13 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(1), Lo, Hi); + return; + } RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2339,7 +2338,6 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } @@ -2350,7 +2348,8 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(), N->getOperand(0), - DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy())); + DAG.getConstant(NVT.getSizeInBits(), dl, + TLI.getPointerTy())); Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); } @@ -2392,14 +2391,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, // A divide for UMULO will be faster than a function call. Select to // make sure we aren't using 0. SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT), - RHS, DAG.getConstant(0, VT), ISD::SETEQ); + RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ); SDValue NotZero = DAG.getSelect(dl, VT, isZero, - DAG.getConstant(1, VT), RHS); + DAG.getConstant(1, dl, VT), RHS); SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, ISD::SETNE); Overflow = DAG.getSelect(dl, N->getValueType(1), isZero, - DAG.getConstant(0, N->getValueType(1)), + DAG.getConstant(0, dl, N->getValueType(1)), Overflow); ReplaceValueWith(SDValue(N, 1), Overflow); return; @@ -2422,7 +2421,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Temp = DAG.CreateStackTemporary(PtrVT); // Temporary for the overflow value, default it to zero. SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, - DAG.getConstant(0, PtrVT), Temp, + DAG.getConstant(0, dl, PtrVT), Temp, MachinePointerInfo(), false, false, 0); TargetLowering::ArgListTy Args; @@ -2457,7 +2456,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, MachinePointerInfo(), false, false, false, 0); SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, - DAG.getConstant(0, PtrVT), + DAG.getConstant(0, dl, PtrVT), ISD::SETNE); // Use the overflow from the libcall everywhere. ReplaceValueWith(SDValue(N, 1), Ofl); @@ -2467,6 +2466,13 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(0), Lo, Hi); + return; + } RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2479,7 +2485,6 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } @@ -2487,6 +2492,13 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(1), Lo, Hi); + return; + } RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2499,7 +2511,6 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } @@ -2511,7 +2522,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, if (Op.getValueType().bitsLE(NVT)) { // The low part is zero extension of the input (degenerates to a copy). Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); - Hi = DAG.getConstant(0, NVT); // The high part is just a zero. + Hi = DAG.getConstant(0, dl, NVT); // The high part is just a zero. } else { // For example, extension of an i48 to an i64. The operand type necessarily // promotes to the result type, so will end up being expanded too. @@ -2536,7 +2547,7 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, SDLoc dl(N); EVT VT = cast<AtomicSDNode>(N)->getMemoryVT(); SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); - SDValue Zero = DAG.getConstant(0, VT); + SDValue Zero = DAG.getConstant(0, dl, VT); SDValue Swap = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0), @@ -2637,7 +2648,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo); NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi); NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS); - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, dl, NewLHS.getValueType()); return; } @@ -2726,7 +2737,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!NewRHS.getNode()) { - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -2744,7 +2755,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!NewRHS.getNode()) { - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -2839,7 +2850,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, @@ -2861,11 +2872,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { if (ExcessBits < NVT.getSizeInBits()) { // Transfer high bits from the top of Lo to the bottom of Hi. Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, - DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl, TLI.getPointerTy())); Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, DAG.getNode(ISD::SRL, dl, NVT, Lo, - DAG.getConstant(ExcessBits, + DAG.getConstant(ExcessBits, dl, TLI.getPointerTy()))); } @@ -2875,7 +2886,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -2931,7 +2942,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { GetExpandedInteger(Op, Lo, Hi); SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Hi.getValueType()), - Hi, DAG.getConstant(0, Hi.getValueType()), + Hi, + DAG.getConstant(0, dl, Hi.getValueType()), ISD::SETLT); // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. @@ -2940,8 +2952,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { TLI.getPointerTy()); // Get a pointer to FF if the sign bit was set, or to 0 otherwise. - SDValue Zero = DAG.getIntPtrConstant(0); - SDValue Four = DAG.getIntPtrConstant(4); + SDValue Zero = DAG.getIntPtrConstant(0, dl); + SDValue Four = DAG.getIntPtrConstant(4, dl); if (TLI.isBigEndian()) std::swap(Zero, Four); SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four); @@ -2999,7 +3011,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { // Extract the element from the original vector. SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), - BaseIdx, DAG.getConstant(i, BaseIdx.getValueType())); + BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType())); SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InVT.getVectorElementType(), N->getOperand(0), Index); @@ -3017,17 +3029,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); SDLoc dl(N); - unsigned NumElts = VT.getVectorNumElements(); - SmallVector<int, 8> NewMask; - for (unsigned i = 0; i != NumElts; ++i) { - NewMask.push_back(SV->getMaskElt(i)); - } + ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements()); SDValue V0 = GetPromotedInteger(N->getOperand(0)); SDValue V1 = GetPromotedInteger(N->getOperand(1)); EVT OutVT = V0.getValueType(); - return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]); + return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask); } @@ -3097,7 +3105,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { SDValue Op = N->getOperand(i); for (unsigned j = 0; j < NumElem; ++j) { SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - InElemTy, Op, DAG.getConstant(j, + InElemTy, Op, DAG.getConstant(j, dl, TLI.getVectorIdxTy())); Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); } @@ -3135,6 +3143,16 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); } +SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) { + SDLoc dl(N); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + MVT InVT = V0.getValueType().getSimpleVT(); + MVT OutVT = MVT::getVectorVT(InVT.getVectorElementType(), + N->getValueType(0).getVectorNumElements()); + SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, V0, N->getOperand(1)); + return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext); +} + SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { SDLoc dl(N); unsigned NumElems = N->getNumOperands(); @@ -3153,7 +3171,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i=0; i<NumElem; ++i) { // Extract element from incoming vector SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, - Incoming, DAG.getConstant(i, TLI.getVectorIdxTy())); + Incoming, DAG.getConstant(i, dl, TLI.getVectorIdxTy())); SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); NewOps.push_back(Tr); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index ebf6b28..9c29769 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -259,6 +259,10 @@ bool DAGTypeLegalizer::run() { WidenVectorResult(N, i); Changed = true; goto NodeDone; + case TargetLowering::TypePromoteFloat: + PromoteFloatResult(N, i); + Changed = true; + goto NodeDone; } } @@ -308,6 +312,10 @@ ScanOperands: NeedsReanalyzing = WidenVectorOperand(N, i); Changed = true; break; + case TargetLowering::TypePromoteFloat: + NeedsReanalyzing = PromoteFloatOperand(N, i); + Changed = true; + break; } break; } @@ -753,6 +761,17 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { OpEntry = Result; } +void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for promoted float"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = PromotedFloats[Op]; + assert(!OpEntry.getNode() && "Node is already promoted!"); + OpEntry = Result; +} + void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { // Note that in some cases vector operation operands may be greater than // the vector element type. For example BUILD_VECTOR of type <1 x i1> with @@ -978,9 +997,9 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair, SDLoc dl(Pair); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType()); Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, - DAG.getIntPtrConstant(1)); + DAG.getIntPtrConstant(1, dl)); } SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, @@ -993,7 +1012,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, - DAG.getConstant(EltSize, Index.getValueType())); + DAG.getConstant(EltSize, dl, Index.getValueType())); return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr); } @@ -1010,7 +1029,8 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, - DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy())); + DAG.getConstant(LVT.getSizeInBits(), dlHi, + TLI.getPointerTy())); return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); } @@ -1096,7 +1116,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, Op.getValueType().getSizeInBits() && "Invalid integer splitting!"); Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, - DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy())); + DAG.getConstant(LoVT.getSizeInBits(), dl, + TLI.getPointerTy())); Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index cef3fc9..2f27789 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -93,6 +93,11 @@ private: /// the same size, this map indicates the converted value to use. SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats; + /// PromotedFloats - For floating point nodes that have a smaller precision + /// than the smallest supported precision, this map indicates what promoted + /// value to use. + SmallDenseMap<SDValue, SDValue, 8> PromotedFloats; + /// ExpandedFloats - For float nodes that need to be expanded this map /// indicates which operands are the expanded version of the input. SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats; @@ -273,6 +278,7 @@ private: SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N); SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); @@ -499,6 +505,44 @@ private: void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, SDLoc dl); + + //===--------------------------------------------------------------------===// + // Float promotion support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + SDValue GetPromotedFloat(SDValue Op) { + SDValue &PromotedOp = PromotedFloats[Op]; + RemapValue(PromotedOp); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetPromotedFloat(SDValue Op, SDValue Result); + + void PromoteFloatResult(SDNode *N, unsigned ResNo); + SDValue PromoteFloatRes_BITCAST(SDNode *N); + SDValue PromoteFloatRes_BinOp(SDNode *N); + SDValue PromoteFloatRes_ConstantFP(SDNode *N); + SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N); + SDValue PromoteFloatRes_FMAD(SDNode *N); + SDValue PromoteFloatRes_FPOWI(SDNode *N); + SDValue PromoteFloatRes_FP_ROUND(SDNode *N); + SDValue PromoteFloatRes_LOAD(SDNode *N); + SDValue PromoteFloatRes_SELECT(SDNode *N); + SDValue PromoteFloatRes_SELECT_CC(SDNode *N); + SDValue PromoteFloatRes_UnaryOp(SDNode *N); + SDValue PromoteFloatRes_UNDEF(SDNode *N); + SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); + + bool PromoteFloatOperand(SDNode *N, unsigned ResNo); + SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo); + //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// @@ -582,6 +626,7 @@ private: void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -593,14 +638,16 @@ private: bool SplitVectorOperand(SDNode *N, unsigned OpNo); SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); SDValue SplitVecOp_UnaryOp(SDNode *N); + SDValue SplitVecOp_TruncateHelper(SDNode *N); SDValue SplitVecOp_BITCAST(SDNode *N); SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); - SDValue SplitVecOp_TRUNCATE(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 38829b6..330c31c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -50,6 +50,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: break; + case TargetLowering::TypePromoteFloat: + llvm_unreachable("Bitcast of a promotion-needing float should never need" + "expansion"); case TargetLowering::TypeSoftenFloat: // Convert the integer operand instead. SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); @@ -117,7 +120,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SmallVector<SDValue, 8> Vals; for (unsigned i = 0; i < NumElems; ++i) Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, - CastInOp, DAG.getConstant(i, + CastInOp, DAG.getConstant(i, dl, TLI.getVectorIdxTy()))); // Build Lo, Hi pair by pairing extracted elements if needed. @@ -170,7 +173,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, + DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); // Load the second half from the stack slot. @@ -235,7 +238,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, - DAG.getConstant(1, Idx.getValueType())); + DAG.getConstant(1, dl, Idx.getValueType())); Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); if (TLI.isBigEndian()) @@ -267,7 +270,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, isInvariant, @@ -436,7 +439,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, - DAG.getConstant(1, Idx.getValueType())); + DAG.getConstant(1, dl, Idx.getValueType())); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); // Convert the new vector to the old vector type. @@ -485,7 +488,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { isVolatile, isNonTemporal, Alignment, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3a8c276..c06227b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -196,6 +196,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0); + bool HasVectorValue = false; if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); ISD::LoadExtType ExtType = LD->getExtensionType(); @@ -207,6 +208,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return TranslateLegalizeResults(Op, Result); case TargetLowering::Custom: if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { + if (Lowered == Result) + return TranslateLegalizeResults(Op, Lowered); Changed = true; if (Lowered->getNumValues() != Op->getNumValues()) { // This expanded to something other than the load. Assume the @@ -232,16 +235,18 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); - case TargetLowering::Custom: - Changed = true; - return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG)); + case TargetLowering::Custom: { + SDValue Lowered = TLI.LowerOperation(Result, DAG); + Changed = Lowered != Result; + return TranslateLegalizeResults(Op, Lowered); + } case TargetLowering::Expand: Changed = true; return LegalizeOp(ExpandStore(Op)); } - } + } else if (Op.getOpcode() == ISD::MSCATTER) + HasVectorValue = true; - bool HasVectorValue = false; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); J != E; ++J) @@ -317,6 +322,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: QueryType = Node->getValueType(0); break; case ISD::FP_ROUND_INREG: @@ -326,6 +335,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UINT_TO_FP: QueryType = Node->getOperand(0).getValueType(); break; + case ISD::MSCATTER: + QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType(); + break; } switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { @@ -375,8 +387,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) { // There are currently two cases of vector promotion: // 1) Bitcasting a vector of integers to a different type to a vector of the - // same overall length. For example, x86 promotes ISD::AND on v2i32 to v1i64. - // 2) Extending a vector of floats to a vector of the same number oflarger + // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. + // 2) Extending a vector of floats to a vector of the same number of larger // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. MVT VT = Op.getSimpleValueType(); assert(Op.getNode()->getNumValues() == 1 && @@ -403,7 +415,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) { if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) - return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0)); + return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl)); else return DAG.getNode(ISD::BITCAST, dl, VT, Op); } @@ -512,7 +524,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment(), + LD->isInvariant(), + MinAlign(LD->getAlignment(), Offset), LD->getAAInfo()); } else { EVT LoadVT = WideVT; @@ -524,13 +537,15 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { LD->getPointerInfo().getWithOffset(Offset), LoadVT, LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - LD->getAlignment(), LD->getAAInfo()); + MinAlign(LD->getAlignment(), Offset), + LD->getAAInfo()); } RemainingBytes -= LoadBytes; Offset += LoadBytes; BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(LoadBytes, BasePTR.getValueType())); + DAG.getConstant(LoadBytes, dl, + BasePTR.getValueType())); LoadVals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -538,7 +553,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { // Extract bits, pack and extend/trunc them into destination type. unsigned SrcEltBits = SrcEltVT.getSizeInBits(); - SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT); + SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT); unsigned BitOffset = 0; unsigned WideIdx = 0; @@ -548,7 +563,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { SDValue Lo, Hi, ShAmt; if (BitOffset < WideBits) { - ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT)); + ShAmt = DAG.getConstant(BitOffset, dl, TLI.getShiftAmountTy(WideVT)); Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); } @@ -558,7 +573,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { WideIdx++; BitOffset -= WideBits; if (BitOffset > 0) { - ShAmt = DAG.getConstant(SrcEltBits - BitOffset, + ShAmt = DAG.getConstant(SrcEltBits - BitOffset, dl, TLI.getShiftAmountTy(WideVT)); Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); @@ -577,7 +592,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); break; case ISD::SEXTLOAD: - ShAmt = DAG.getConstant(WideBits - SrcEltBits, + ShAmt = DAG.getConstant(WideBits - SrcEltBits, dl, TLI.getShiftAmountTy(WideVT)); Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); @@ -595,10 +610,10 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), SrcVT.getScalarType(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - LD->getAlignment(), LD->getAAInfo()); + MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo()); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(Stride, BasePTR.getValueType())); + DAG.getConstant(Stride, dl, BasePTR.getValueType())); Vals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -649,15 +664,16 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { SmallVector<SDValue, 8> Stores; for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy())); + RegSclVT, Value, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment, AAInfo); + isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride), + AAInfo); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(Stride, BasePTR.getValueType())); + DAG.getConstant(Stride, dl, BasePTR.getValueType())); Stores.push_back(Store); } @@ -727,8 +743,9 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { EVT BitTy = MaskTy.getScalarType(); Mask = DAG.getSelect(DL, BitTy, Mask, - DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), - DAG.getConstant(0, BitTy)); + DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, + BitTy), + DAG.getConstant(0, DL, BitTy)); // Broadcast the mask so that the entire vector is all-one or all zero. SmallVector<SDValue, 8> Ops(NumElem, Mask); @@ -741,7 +758,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); SDValue AllOnes = DAG.getConstant( - APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy); + APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy); SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); @@ -763,7 +780,7 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { unsigned BW = VT.getScalarType().getSizeInBits(); unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); - SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT); + SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); Op = Op.getOperand(0); Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); @@ -810,7 +827,7 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { // without full scalarization than the sign extension does. unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); - SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT); + SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); return DAG.getNode(ISD::SRA, DL, VT, DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), ShiftAmount); @@ -829,7 +846,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { // Build up a zero vector to blend into this one. EVT SrcScalarVT = SrcVT.getScalarType(); - SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT); + SDValue ScalarZero = DAG.getTargetConstant(0, DL, SrcScalarVT); SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero); SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); @@ -910,7 +927,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); SDValue AllOnes = DAG.getConstant( - APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT); + APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT); SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); @@ -933,16 +950,16 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); unsigned BW = SVT.getSizeInBits(); - SDValue HalfWord = DAG.getConstant(BW/2, VT); + SDValue HalfWord = DAG.getConstant(BW/2, DL, VT); // Constants to clear the upper part of the word. // Notice that we can also use SHL+SHR, but using a constant is slightly // faster on x86. uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; - SDValue HalfWordMask = DAG.getConstant(HWMask, VT); + SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); // Two to the power of half-word-size. - SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType()); + SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType()); // Clear upper part of LO, lower HI SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); @@ -961,8 +978,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { - SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + SDLoc DL(Op); + SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); + return DAG.getNode(ISD::FSUB, DL, Op.getValueType(), Zero, Op.getOperand(0)); } return DAG.UnrollVectorOp(Op.getNode()); @@ -978,16 +996,16 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { SmallVector<SDValue, 8> Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getConstant(i, TLI.getVectorIdxTy())); + DAG.getConstant(i, dl, TLI.getVectorIdxTy())); SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getConstant(i, TLI.getVectorIdxTy())); + DAG.getConstant(i, dl, TLI.getVectorIdxTy())); Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT), LHSElem, RHSElem, CC); Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getConstant(APInt::getAllOnesValue - (EltVT.getSizeInBits()), EltVT), - DAG.getConstant(0, EltVT)); + (EltVT.getSizeInBits()), dl, EltVT), + DAG.getConstant(0, dl, EltVT)); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 63671f7..445e882 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -252,7 +252,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { } else { EVT VT = OpVT.getVectorElementType(); Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); } return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); } @@ -308,7 +308,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); // Vector read from all ones, scalar expects a single 1 so mask. Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT, - Cond, DAG.getConstant(1, CondVT)); + Cond, DAG.getConstant(1, SDLoc(N), CondVT)); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: assert(VecBool == TargetLowering::UndefinedBooleanContent || @@ -385,9 +385,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { } else { EVT VT = OpVT.getVectorElementType(); LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); } // Turn it into a scalar SETCC. @@ -600,6 +600,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::MLOAD: SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi); break; + case ISD::MGATHER: + SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi); + break; case ISD::SETCC: SplitVecRes_SETCC(N, Lo, Hi); break; @@ -668,6 +671,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UREM: case ISD::SREM: case ISD::FREM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: SplitVecRes_BinOp(N, Lo, Hi); break; case ISD::FMA: @@ -723,6 +730,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: + case TargetLowering::TypePromoteFloat: case TargetLowering::TypeSoftenFloat: case TargetLowering::TypeScalarizeVector: case TargetLowering::TypeWidenVector: @@ -810,7 +818,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, - DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), + DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl, TLI.getVectorIdxTy())); } @@ -844,7 +852,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, StackPtr.getValueType())); + DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), @@ -891,7 +899,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Lo.getValueType(), Lo, Elt, Idx); else Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, - DAG.getConstant(IdxVal - LoNumElts, + DAG.getConstant(IdxVal - LoNumElts, dl, TLI.getVectorIdxTy())); return; } @@ -923,7 +931,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, StackPtr.getValueType())); + DAG.getConstant(IncrementSize, dl, + StackPtr.getValueType())); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), @@ -966,7 +975,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, @@ -1021,7 +1030,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), @@ -1043,6 +1052,54 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, } +void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, + SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(MGT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0)); + + SDValue Ch = MGT->getChain(); + SDValue Ptr = MGT->getBasePtr(); + SDValue Mask = MGT->getMask(); + unsigned Alignment = MGT->getOriginalAlignment(); + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + + EVT MemoryVT = MGT->getMemoryVT(); + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl); + + SDValue IndexHi, IndexLo; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), MGT->getRanges()); + + SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; + Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, + MMO); + + SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; + Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, + MMO); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MGT, 1), Ch); +} + + void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && @@ -1236,8 +1293,9 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // Extract the vector element by hand. SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Inputs[Input], DAG.getConstant(Idx, - TLI.getVectorIdxTy()))); + Inputs[Input], + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy()))); } // Construct the Lo/Hi output using a BUILD_VECTOR. @@ -1293,7 +1351,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; - case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break; + case ISD::TRUNCATE: + Res = SplitVecOp_TruncateHelper(N); + break; case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); @@ -1301,21 +1361,37 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::MSTORE: Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); break; + case ISD::MSCATTER: + Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo); + break; + case ISD::MGATHER: + Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo); + break; case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); break; - case ISD::CTTZ: - case ISD::CTLZ: - case ISD::CTPOP: - case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0))) + Res = SplitVecOp_TruncateHelper(N); + else + Res = SplitVecOp_UnaryOp(N); + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - case ISD::FTRUNC: + if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0))) + Res = SplitVecOp_TruncateHelper(N); + else + Res = SplitVecOp_UnaryOp(N); + break; + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FP_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: + case ISD::FTRUNC: Res = SplitVecOp_UnaryOp(N); break; } @@ -1420,7 +1496,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); } else { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, - DAG.getConstant(IdxVal - LoElts, Idx.getValueType())); + DAG.getConstant(IdxVal - LoElts, dl, + Idx.getValueType())); } } @@ -1441,7 +1518,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { if (IdxVal < LoElts) return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); return SDValue(DAG.UpdateNodeOperands(N, Hi, - DAG.getConstant(IdxVal - LoElts, + DAG.getConstant(IdxVal - LoElts, SDLoc(N), Idx.getValueType())), 0); } @@ -1462,6 +1539,68 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { MachinePointerInfo(), EltVT, false, false, false, 0); } +SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, + unsigned OpNo) { + EVT LoVT, HiVT; + SDLoc dl(MGT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0)); + + SDValue Ch = MGT->getChain(); + SDValue Ptr = MGT->getBasePtr(); + SDValue Index = MGT->getIndex(); + SDValue Mask = MGT->getMask(); + unsigned Alignment = MGT->getOriginalAlignment(); + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + + EVT MemoryVT = MGT->getMemoryVT(); + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl); + + SDValue IndexHi, IndexLo; + if (Index.getNode()) + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); + else + IndexLo = IndexHi = Index; + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), MGT->getRanges()); + + SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; + SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, + OpsLo, MMO); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), + MGT->getRanges()); + + SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; + SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, + OpsHi, MMO); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MGT, 1), Ch); + + SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo, + Hi); + ReplaceValueWith(SDValue(MGT, 0), Res); + return SDValue(); +} + SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { SDValue Ch = N->getChain(); @@ -1497,7 +1636,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), @@ -1507,11 +1646,64 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, N->isTruncatingStore()); - // Build a factor node to remember that this store is independent of the // other one. return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} +SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, + unsigned OpNo) { + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + SDValue Mask = N->getMask(); + SDValue Index = N->getIndex(); + SDValue Data = N->getValue(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + SDLoc DL(N); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + GetSplitVector(Data, DataLo, DataHi); + SDValue MaskLo, MaskHi; + GetSplitVector(Mask, MaskLo, MaskHi); + + SDValue PtrLo, PtrHi; + if (Ptr.getValueType().isVector()) // gather form vector of pointers + std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL); + else + PtrLo = PtrHi = Ptr; + + SDValue IndexHi, IndexLo; + if (Index.getNode()) + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); + else + IndexLo = IndexHi = Index; + + SDValue Lo, Hi; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo}; + Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), + DL, OpsLo, MMO); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi}; + Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), + DL, OpsHi, MMO); + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -1544,7 +1736,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, @@ -1573,7 +1765,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); i != e; ++i) { Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, - Op, DAG.getConstant(i, TLI.getVectorIdxTy()))); + Op, DAG.getConstant(i, DL, TLI.getVectorIdxTy()))); } } @@ -1581,7 +1773,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts); } -SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { +SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { // The result type is legal, but the input type is illegal. If splitting // ends up with the result type of each half still being legal, just // do that. If, however, that would result in an illegal result type, @@ -1603,6 +1795,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { EVT InVT = InVec->getValueType(0); EVT OutVT = N->getValueType(0); unsigned NumElements = OutVT.getVectorNumElements(); + bool IsFloat = OutVT.isFloatingPoint(); + // Widening should have already made sure this is a power-two vector // if we're trying to split it at all. assert() that's true, just in case. assert(!(NumElements & 1) && "Splitting vector, but not in half!"); @@ -1621,11 +1815,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { SDValue InLoVec, InHiVec; std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); // Truncate them to 1/2 the element size. - EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); + EVT HalfElementVT = IsFloat ? + EVT::getFloatingPointVT(InElementSize/2) : + EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements/2); - SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec); - SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec); + SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); + SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); // Concatenate them to get the full intermediate truncation result. EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, @@ -1634,7 +1830,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { // type. This should normally be something that ends up being legal directly, // but in theory if a target has very wide vectors and an annoyingly // restricted set of legal types, this split can chain to build things up. - return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); + return IsFloat ? + DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec, + DAG.getTargetConstant(0, DL, TLI.getPointerTy())) : + DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); } SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { @@ -1865,9 +2064,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, - DAG.getConstant(Idx, TLI.getVectorIdxTy())); + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy())); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, - DAG.getConstant(Idx, TLI.getVectorIdxTy())); + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy())); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); Idx += NumElts; CurNumElts -= NumElts; @@ -1880,11 +2081,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { if (NumElts == 1) { for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp1, DAG.getConstant(Idx, - TLI.getVectorIdxTy())); + InOp1, + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy())); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp2, DAG.getConstant(Idx, - TLI.getVectorIdxTy())); + InOp2, + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy())); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2); } @@ -1922,8 +2125,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { unsigned NumToInsert = ConcatEnd - Idx - 1; for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, - ConcatOps[OpIdx], DAG.getConstant(i, - TLI.getVectorIdxTy())); + ConcatOps[OpIdx], + DAG.getConstant(i, dl, TLI.getVectorIdxTy())); } ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; @@ -2009,9 +2212,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { } if (InVTNumElts % WidenNumElts == 0) { - SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, - InOp, DAG.getConstant(0, - TLI.getVectorIdxTy())); + SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); @@ -2026,7 +2228,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned i; for (i=0; i < MinElts; ++i) { SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, - DAG.getConstant(i, TLI.getVectorIdxTy())); + DAG.getConstant(i, DL, TLI.getVectorIdxTy())); if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else @@ -2114,6 +2316,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); break; case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypePromoteFloat: case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: case TargetLowering::TypeScalarizeVector: @@ -2252,7 +2455,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(j, TLI.getVectorIdxTy())); + DAG.getConstant(j, dl, TLI.getVectorIdxTy())); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) @@ -2310,7 +2513,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { if (InVTNumElts % WidenNumElts == 0) { // Extract the input and convert the shorten input vector. InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2326,7 +2529,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { unsigned i; for (i=0; i < MinElts; ++i) { SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, TLI.getVectorIdxTy())); + DAG.getConstant(i, dl, TLI.getVectorIdxTy())); Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2369,7 +2572,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned i; for (i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy())); + DAG.getConstant(IdxVal + i, dl, TLI.getVectorIdxTy())); SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) @@ -2432,7 +2635,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { unsigned NumConcat = WidenNumElts / MaskNumElts; SmallVector<SDValue, 16> Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, MaskVT); + SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); Ops[0] = Mask; for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = ZeroVal; @@ -2553,6 +2756,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { assert(InVT.isVector() && "can not widen non-vector type"); EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); + + // The input and output types often differ here, and it could be that while + // we'd prefer to widen the result type, the input operands have been split. + // In this case, we also need to split the result of this node as well. + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + SDValue SplitVSetCC = SplitVecOp_VSETCC(N); + SDValue Res = ModifyToType(SplitVSetCC, WidenVT); + return Res; + } + InOp1 = GetWidenedVector(InOp1); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); @@ -2662,10 +2875,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); else InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); break; } } @@ -2710,7 +2923,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { for (unsigned i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(Opcode, dl, EltVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, TLI.getVectorIdxTy()))); + DAG.getConstant(i, dl, + TLI.getVectorIdxTy()))); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } @@ -2731,7 +2945,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); } } @@ -2759,7 +2973,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(j, TLI.getVectorIdxTy())); + DAG.getConstant(j, dl, TLI.getVectorIdxTy())); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } @@ -2814,7 +3028,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { unsigned NumConcat = WidenNumElts / MaskNumElts; SmallVector<SDValue, 16> Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, MaskVT); + SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); Ops[0] = Mask; for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = ZeroVal; @@ -2849,8 +3063,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SVT.getVectorElementType(), N->getValueType(0).getVectorNumElements()); SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, - ResVT, WideSETCC, DAG.getConstant(0, - TLI.getVectorIdxTy())); + ResVT, WideSETCC, + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); return PromoteTargetBoolean(CC, N->getValueType(0)); } @@ -2888,7 +3102,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (MemVT.getSizeInBits() <= WidenEltWidth) break; - if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); + if ((Action == TargetLowering::TypeLegal || + Action == TargetLowering::TypePromoteInteger) && + (WidenWidth % MemVTWidth) == 0 && isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { @@ -2944,7 +3161,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, LdTy = NewLdTy; } VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], - DAG.getConstant(Idx++, TLI.getVectorIdxTy())); + DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy())); } return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } @@ -3015,7 +3232,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Increment = NewVTWidth / 8; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, BasePtr.getValueType())); + DAG.getConstant(Increment, dl, BasePtr.getValueType())); SDValue L; if (LdWidth < NewVTWidth) { @@ -3141,7 +3358,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Offset, + DAG.getConstant(Offset, dl, BasePtr.getValueType())); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, @@ -3192,7 +3409,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, unsigned NumVTElts = NewVT.getVectorNumElements(); do { SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, - DAG.getConstant(Idx, TLI.getVectorIdxTy())); + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy())); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, @@ -3201,7 +3419,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, Offset += Increment; Idx += NumVTElts; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, BasePtr.getValueType())); + DAG.getConstant(Increment, dl, + BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { // Cast the vector to the scalar type we can store @@ -3212,7 +3431,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, Idx = Idx * ValEltWidth / NewVTWidth; do { SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, - DAG.getConstant(Idx++, TLI.getVectorIdxTy())); + DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy())); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, @@ -3220,7 +3439,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, BasePtr.getValueType())); + DAG.getConstant(Increment, dl, + BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type Idx = Idx * NewVTWidth / ValEltWidth; @@ -3258,7 +3478,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, unsigned Increment = ValEltVT.getSizeInBits() / 8; unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, isVolatile, isNonTemporal, Align, @@ -3266,10 +3486,11 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getConstant(Offset, - BasePtr.getValueType())); + BasePtr, + DAG.getConstant(Offset, dl, + BasePtr.getValueType())); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, isVolatile, isNonTemporal, @@ -3306,7 +3527,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); // Fall back to extract and build. SmallVector<SDValue, 16> Ops(WidenNumElts); @@ -3315,7 +3536,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { unsigned Idx; for (Idx = 0; Idx < MinNumElts; ++Idx) Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(Idx, TLI.getVectorIdxTy())); + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())); SDValue UndefVal = DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index db38b76..6303422 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -47,7 +47,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) TRI = STI.getRegisterInfo(); TLI = IS->TLI; TII = STI.getInstrInfo(); - ResourcesModel = TII->CreateTargetScheduleState(STI); + ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); // This hard requirement could be relaxed, but for now // do not let it procede. assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); @@ -637,17 +637,3 @@ void ResourcePriorityQueue::remove(SUnit *SU) { Queue.pop_back(); } - - -#ifdef NDEBUG -void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {} -#else -void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const { - ResourcePriorityQueue q = *this; - while (!q.empty()) { - SUnit *su = q.pop(); - dbgs() << "Height " << su->getHeight() << ": "; - su->dump(DAG); - } -} -#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index bce69d7..c27f8de 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -35,7 +35,6 @@ public: FRAMEIX = 2 // value is contents of a stack location }; private: - enum DbgValueKind kind; union { struct { SDNode *Node; // valid for expressions @@ -46,17 +45,18 @@ private: } u; MDNode *Var; MDNode *Expr; - bool IsIndirect; uint64_t Offset; DebugLoc DL; unsigned Order; - bool Invalid; + enum DbgValueKind kind; + bool IsIndirect; + bool Invalid = false; + public: // Constructor for non-constants. SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir, uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), IsIndirect(indir), Offset(off), DL(dl), Order(O), - Invalid(false) { + : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(indir) { kind = SDNODE; u.s.Node = N; u.s.ResNo = R; @@ -65,8 +65,7 @@ public: // Constructor for constants. SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O), - Invalid(false) { + : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) { kind = CONST; u.Const = C; } @@ -74,8 +73,7 @@ public: // Constructor for frame indices. SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O), - Invalid(false) { + : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) { kind = FRAMEIX; u.FrameIx = FI; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 5222de1..fd0fa31 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -173,7 +173,7 @@ public: HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } - ~ScheduleDAGRRList() { + ~ScheduleDAGRRList() override { delete HazardRec; delete AvailableQueue; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index f2b18fc..3853ada 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -137,13 +137,9 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, } // Helper for AddGlue to clone node operands. -static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, - SmallVectorImpl<EVT> &VTs, +static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs, SDValue ExtraOper = SDValue()) { - SmallVector<SDValue, 8> Ops; - for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) - Ops.push_back(N->getOperand(I)); - + SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end()); if (ExtraOper.getNode()) Ops.push_back(ExtraOper); @@ -165,7 +161,6 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, } static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { - SmallVector<EVT, 4> VTs; SDNode *GlueDestNode = Glue.getNode(); // Don't add glue from a node to itself. @@ -179,9 +174,7 @@ static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { // Don't add glue to something that already has a glue value. if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false; - for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) - VTs.push_back(N->getValueType(I)); - + SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end()); if (AddGlue) VTs.push_back(MVT::Glue); @@ -197,11 +190,8 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { !N->hasAnyUseOfValue(N->getNumValues() - 1)) && "expected an unused glue value"); - SmallVector<EVT, 4> VTs; - for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I) - VTs.push_back(N->getValueType(I)); - - CloneNodeWithValues(N, DAG, VTs); + CloneNodeWithValues(N, DAG, + makeArrayRef(N->value_begin(), N->getNumValues() - 1)); } /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 2cd1f4b..6351fa2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -44,7 +44,7 @@ namespace llvm { explicit ScheduleDAGSDNodes(MachineFunction &mf); - virtual ~ScheduleDAGSDNodes() {} + ~ScheduleDAGSDNodes() override {} /// Run - perform scheduling. /// diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 418b58e..eee4a4b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -76,7 +76,7 @@ public: HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } - ~ScheduleDAGVLIW() { + ~ScheduleDAGVLIW() override { delete HazardRec; delete AvailableQueue; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f75d5f4..efd4bd9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -49,6 +49,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cmath> +#include <utility> using namespace llvm; @@ -196,6 +197,22 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { return true; } +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantFPSDNode or undef. +bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) + continue; + if (!isa<ConstantFPSDNode>(Op)) + return false; + } + return true; +} + /// isScalarToVector - Return true if the specified node is a /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low /// element is not an undef. @@ -499,8 +516,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::SUB: case ISD::SHL: { const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N); - AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(), - BinNode->hasNoSignedWrap(), BinNode->isExact()); + AddBinaryNodeIDCustom( + ID, N->getOpcode(), BinNode->Flags.hasNoUnsignedWrap(), + BinNode->Flags.hasNoSignedWrap(), BinNode->Flags.hasExact()); break; } case ISD::ATOMIC_CMP_SWAP: @@ -860,7 +878,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); - SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); return Node; } @@ -878,7 +896,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); - SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); return Node; } @@ -895,7 +913,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); - SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); return Node; } @@ -947,9 +965,9 @@ BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, if (isBinOpWithFlags(Opcode)) { BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode( Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); - FN->setHasNoUnsignedWrap(nuw); - FN->setHasNoSignedWrap(nsw); - FN->setIsExact(exact); + FN->Flags.setNoUnsignedWrap(nuw); + FN->Flags.setNoSignedWrap(nsw); + FN->Flags.setExact(exact); return FN; } @@ -959,6 +977,40 @@ BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, return N; } +SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + void *&InsertPos) { + SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (N) { + switch (N->getOpcode()) { + default: break; + case ISD::Constant: + case ISD::ConstantFP: + llvm_unreachable("Querying for Constant and ConstantFP nodes requires " + "debug location. Use another overload."); + } + } + return N; +} + +SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + DebugLoc DL, void *&InsertPos) { + SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (N) { + switch (N->getOpcode()) { + default: break; // Process only regular (non-target) constant nodes. + case ISD::Constant: + case ISD::ConstantFP: + // Erase debug location from the node if the node is used at several + // different places to do not propagate one location to all uses as it + // leads to incorrect debug info. + if (N->getDebugLoc() != DL) + N->setDebugLoc(DebugLoc()); + break; + } + } + return N; +} + void SelectionDAG::clear() { allnodes_clear(); OperandAllocator.Reset(); @@ -1014,7 +1066,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { APInt Imm = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); return getNode(ISD::AND, DL, Op.getValueType(), Op, - getConstant(Imm, Op.getValueType())); + getConstant(Imm, DL, Op.getValueType())); } SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { @@ -1052,7 +1104,7 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue NegOne = - getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT); return getNode(ISD::XOR, DL, VT, Val, NegOne); } @@ -1062,31 +1114,33 @@ SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { switch (TLI->getBooleanContents(VT)) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: - TrueValue = getConstant(1, VT); + TrueValue = getConstant(1, DL, VT); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: - TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), + TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT); break; } return getNode(ISD::XOR, DL, VT, Val, TrueValue); } -SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) { +SDValue SelectionDAG::getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isT, + bool isO) { EVT EltVT = VT.getScalarType(); assert((EltVT.getSizeInBits() >= 64 || (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && "getConstant with a uint64_t value that doesn't fit in the type!"); - return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT, isO); + return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO); } -SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT, bool isO) +SDValue SelectionDAG::getConstant(const APInt &Val, SDLoc DL, EVT VT, bool isT, + bool isO) { - return getConstant(*ConstantInt::get(*Context, Val), VT, isT, isO); + return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO); } -SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, - bool isO) { +SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, + bool isT, bool isO) { assert(VT.isInteger() && "Cannot create FP integer constant!"); EVT EltVT = VT.getScalarType(); @@ -1125,7 +1179,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, SmallVector<SDValue, 2> EltParts; for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) - .trunc(ViaEltSizeInBits), + .trunc(ViaEltSizeInBits), DL, ViaEltVT, isT, isO)); } @@ -1160,12 +1214,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, ID.AddBoolean(isO); void *IP = nullptr; SDNode *N = nullptr; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))) if (!VT.isVector()) return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT); + N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, DL.getDebugLoc(), + EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); } @@ -1179,16 +1234,17 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, return Result; } -SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { - return getConstant(Val, TLI->getPointerTy(), isTarget); +SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget) { + return getConstant(Val, DL, TLI->getPointerTy(), isTarget); } - -SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) { - return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget); +SDValue SelectionDAG::getConstantFP(const APFloat& V, SDLoc DL, EVT VT, + bool isTarget) { + return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); } -SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ +SDValue SelectionDAG::getConstantFP(const ConstantFP& V, SDLoc DL, EVT VT, + bool isTarget){ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); EVT EltVT = VT.getScalarType(); @@ -1202,12 +1258,13 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ ID.AddPointer(&V); void *IP = nullptr; SDNode *N = nullptr; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))) if (!VT.isVector()) return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT); + N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, DL.getDebugLoc(), + EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); } @@ -1216,25 +1273,25 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ if (VT.isVector()) { SmallVector<SDValue, 8> Ops; Ops.assign(VT.getVectorNumElements(), Result); - // FIXME SDLoc info might be appropriate here Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); } return Result; } -SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { +SDValue SelectionDAG::getConstantFP(double Val, SDLoc DL, EVT VT, + bool isTarget) { EVT EltVT = VT.getScalarType(); if (EltVT==MVT::f32) - return getConstantFP(APFloat((float)Val), VT, isTarget); + return getConstantFP(APFloat((float)Val), DL, VT, isTarget); else if (EltVT==MVT::f64) - return getConstantFP(APFloat(Val), VT, isTarget); + return getConstantFP(APFloat(Val), DL, VT, isTarget); else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 || EltVT==MVT::f16) { bool ignored; APFloat apf = APFloat(Val); apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, &ignored); - return getConstantFP(apf, VT, isTarget); + return getConstantFP(apf, DL, VT, isTarget); } else llvm_unreachable("Unsupported type in getConstantFP"); } @@ -1264,7 +1321,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, ID.AddInteger(TargetFlags); ID.AddInteger(GV->getType()->getAddressSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL.getIROrder(), @@ -1281,7 +1338,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(FI); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget); @@ -1300,7 +1357,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, ID.AddInteger(JTI); ID.AddInteger(TargetFlags); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget, @@ -1326,7 +1383,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, ID.AddPointer(C); ID.AddInteger(TargetFlags); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, @@ -1353,7 +1410,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, C->addSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, @@ -1371,7 +1428,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, @@ -1386,7 +1443,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None); ID.AddPointer(MBB); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB); @@ -1446,13 +1503,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { // N2 to point at N1. static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { std::swap(N1, N2); - int NElts = M.size(); - for (int i = 0; i != NElts; ++i) { - if (M[i] >= NElts) - M[i] -= NElts; - else if (M[i] >= 0) - M[i] += NElts; - } + ShuffleVectorSDNode::commuteMask(M); } SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, @@ -1484,6 +1535,34 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, if (N1.getOpcode() == ISD::UNDEF) commuteShuffle(N1, N2, MaskVec); + // If shuffling a splat, try to blend the splat instead. We do this here so + // that even when this arises during lowering we don't have to re-handle it. + auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + if (!Splat) + return; + + for (int i = 0; i < (int)NElts; ++i) { + if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts)) + continue; + + // If this input comes from undef, mark it as such. + if (UndefElements[MaskVec[i] - Offset]) { + MaskVec[i] = -1; + continue; + } + + // If we can blend a non-undef lane, use that instead. + if (!UndefElements[i]) + MaskVec[i] = i + Offset; + } + }; + if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) + BlendSplat(N1BV, 0); + if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) + BlendSplat(N2BV, NElts); + // Canonicalize all index into lhs, -> shuffle lhs, undef // Canonicalize all index into rhs, -> shuffle rhs, undef bool AllLHS = true, AllRHS = true; @@ -1513,9 +1592,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, return getUNDEF(VT); // If Identity shuffle return that node. - bool Identity = true; + bool Identity = true, AllSame = true; for (unsigned i = 0; i != NElts; ++i) { if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; + if (MaskVec[i] != MaskVec[0]) AllSame = false; } if (Identity && NElts) return N1; @@ -1537,18 +1617,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, if (Splat && Splat.getOpcode() == ISD::UNDEF) return getUNDEF(VT); + bool SameNumElts = + V.getValueType().getVectorNumElements() == VT.getVectorNumElements(); + // We only have a splat which can skip shuffles if there is a splatted // value and no undef lanes rearranged by the shuffle. if (Splat && UndefElements.none()) { // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the // number of elements match or the value splatted is a zero constant. - if (V.getValueType().getVectorNumElements() == - VT.getVectorNumElements()) + if (SameNumElts) return N1; if (auto *C = dyn_cast<ConstantSDNode>(Splat)) if (C->isNullValue()) return N1; } + + // If the shuffle itself creates a splat, build the vector directly. + if (AllSame && SameNumElts) { + const SDValue &Splatted = BV->getOperand(MaskVec[0]); + SmallVector<SDValue, 8> Ops(NElts, Splatted); + + EVT BuildVT = BV->getValueType(0); + SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops); + + // We may have jumped through bitcasts, so the type of the + // BUILD_VECTOR may not match the type of the shuffle. + if (BuildVT != VT) + NewBV = getNode(ISD::BITCAST, dl, VT, NewBV); + return NewBV; + } } } @@ -1559,7 +1656,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, ID.AddInteger(MaskVec[i]); void* IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) return SDValue(E, 0); // Allocate the mask array for the node out of the BumpPtrAllocator, since @@ -1579,19 +1676,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { MVT VT = SV.getSimpleValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - SmallVector<int, 8> MaskVec; - - for (unsigned i = 0; i != NumElems; ++i) { - int Idx = SV.getMaskElt(i); - if (Idx >= 0) { - if (Idx < (int)NumElems) - Idx += NumElems; - else - Idx -= NumElems; - } - MaskVec.push_back(Idx); - } + SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); + ShuffleVectorSDNode::commuteMask(MaskVec); SDValue Op0 = SV.getOperand(0); SDValue Op1 = SV.getOperand(1); @@ -1612,7 +1698,7 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops); void* IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) return SDValue(E, 0); CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), @@ -1628,7 +1714,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); ID.AddInteger(RegNo); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT); @@ -1642,7 +1728,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None); ID.AddPointer(RegMask); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); @@ -1657,7 +1743,7 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops); ID.AddPointer(Label); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), @@ -1680,7 +1766,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, @@ -1699,7 +1785,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { ID.AddPointer(V); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) SrcValueSDNode(V); @@ -1715,7 +1801,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { ID.AddPointer(MD); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); @@ -1734,7 +1820,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, ID.AddInteger(DestAS); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(), @@ -1791,13 +1877,14 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, switch (Cond) { default: break; case ISD::SETFALSE: - case ISD::SETFALSE2: return getConstant(0, VT); + case ISD::SETFALSE2: return getConstant(0, dl, VT); case ISD::SETTRUE: case ISD::SETTRUE2: { TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(N1->getValueType(0)); return getConstant( - Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, + VT); } case ISD::SETOEQ: @@ -1821,16 +1908,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, switch (Cond) { default: llvm_unreachable("Unknown integer setcc!"); - case ISD::SETEQ: return getConstant(C1 == C2, VT); - case ISD::SETNE: return getConstant(C1 != C2, VT); - case ISD::SETULT: return getConstant(C1.ult(C2), VT); - case ISD::SETUGT: return getConstant(C1.ugt(C2), VT); - case ISD::SETULE: return getConstant(C1.ule(C2), VT); - case ISD::SETUGE: return getConstant(C1.uge(C2), VT); - case ISD::SETLT: return getConstant(C1.slt(C2), VT); - case ISD::SETGT: return getConstant(C1.sgt(C2), VT); - case ISD::SETLE: return getConstant(C1.sle(C2), VT); - case ISD::SETGE: return getConstant(C1.sge(C2), VT); + case ISD::SETEQ: return getConstant(C1 == C2, dl, VT); + case ISD::SETNE: return getConstant(C1 != C2, dl, VT); + case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT); + case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT); + case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT); + case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT); + case ISD::SETLT: return getConstant(C1.slt(C2), dl, VT); + case ISD::SETGT: return getConstant(C1.sgt(C2), dl, VT); + case ISD::SETLE: return getConstant(C1.sle(C2), dl, VT); + case ISD::SETGE: return getConstant(C1.sge(C2), dl, VT); } } } @@ -1842,41 +1929,41 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, case ISD::SETEQ: if (R==APFloat::cmpUnordered) return getUNDEF(VT); // fall through - case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT); + case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT); case ISD::SETNE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); // fall through case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpLessThan, VT); + R==APFloat::cmpLessThan, dl, VT); case ISD::SETLT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); // fall through - case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT); + case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT); case ISD::SETGT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); // fall through - case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT); + case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT); case ISD::SETLE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); // fall through case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || - R==APFloat::cmpEqual, VT); + R==APFloat::cmpEqual, dl, VT); case ISD::SETGE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); // fall through case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpEqual, VT); - case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT); - case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT); + R==APFloat::cmpEqual, dl, VT); + case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT); + case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, dl, VT); case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpEqual, VT); - case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT); + R==APFloat::cmpEqual, dl, VT); + case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT); case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpLessThan, VT); + R==APFloat::cmpLessThan, dl, VT); case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpUnordered, VT); - case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT); - case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT); + R==APFloat::cmpUnordered, dl, VT); + case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT); + case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT); } } else { // Ensure that the constant occurs on the RHS. @@ -2323,6 +2410,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); break; } + case ISD::EXTRACT_ELEMENT: { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + const unsigned Index = + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + const unsigned BitWidth = Op.getValueType().getSizeInBits(); + + // Remove low part of known bits mask + KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth); + KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth); + + // Remove high part of known bit mask + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); + break; + } case ISD::FrameIndex: case ISD::TargetFrameIndex: if (unsigned Align = InferPtrAlignment(Op)) { @@ -2522,6 +2624,21 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // FIXME: it's tricky to do anything useful for this, but it is an important // case for targets like X86. break; + case ISD::EXTRACT_ELEMENT: { + const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); + const int BitWidth = Op.getValueType().getSizeInBits(); + const int Items = + Op.getOperand(0).getValueType().getSizeInBits() / BitWidth; + + // Get reverse index (starting from 1), Op1 value indexes elements from + // little end. Sign starts at big end. + const int rIndex = Items - 1 - + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + + // If the sign portion ends in our element the substraction gives correct + // result. Otherwise it gives either negative or > bitwidth result + return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); + } } // If we are looking at the loaded value of the SDNode. @@ -2643,7 +2760,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), None); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), @@ -2666,12 +2783,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, switch (Opcode) { default: break; case ISD::SIGN_EXTEND: - return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT, + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, C->isTargetOpcode(), C->isOpaque()); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::TRUNCATE: - return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT, + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, C->isTargetOpcode(), C->isOpaque()); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { @@ -2680,29 +2797,29 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, APFloat::rmNearestTiesToEven); - return getConstantFP(apf, VT); + return getConstantFP(apf, DL, VT); } case ISD::BITCAST: if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) - return getConstantFP(APFloat(APFloat::IEEEhalf, Val), VT); + return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT); + return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT); + return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT); break; case ISD::BSWAP: - return getConstant(Val.byteSwap(), VT, C->isTargetOpcode(), + return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), C->isOpaque()); case ISD::CTPOP: - return getConstant(Val.countPopulation(), VT, C->isTargetOpcode(), + return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(), C->isOpaque()); case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - return getConstant(Val.countLeadingZeros(), VT, C->isTargetOpcode(), + return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(), C->isOpaque()); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - return getConstant(Val.countTrailingZeros(), VT, C->isTargetOpcode(), + return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(), C->isOpaque()); } } @@ -2713,26 +2830,26 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, switch (Opcode) { case ISD::FNEG: V.changeSign(); - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); case ISD::FABS: V.clearSign(); - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); case ISD::FCEIL: { APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); break; } case ISD::FTRUNC: { APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); break; } case ISD::FFLOOR: { APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); break; } case ISD::FP_EXTEND: { @@ -2741,7 +2858,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, // FIXME need to be more flexible about rounding mode. (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); } case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: { @@ -2755,20 +2872,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual break; APInt api(VT.getSizeInBits(), x); - return getConstant(api, VT); + return getConstant(api, DL, VT); } case ISD::BITCAST: if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) - return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), VT); + return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT); else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) - return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT); else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) - return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); break; } } - // Constant fold unary operations with a vector integer operand. + // Constant fold unary operations with a vector integer or float operand. if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) { if (BV->isConstant()) { switch (Opcode) { @@ -2776,18 +2893,55 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, // FIXME: Entirely reasonable to perform folding of other unary // operations here as the need arises. break; + case ISD::FNEG: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { + EVT SVT = VT.getScalarType(); + EVT InVT = BV->getValueType(0); + EVT InSVT = InVT.getScalarType(); + + // Find legal integer scalar type for constant promotion and + // ensure that its scalar size is at least as large as source. + EVT LegalSVT = SVT; + if (SVT.isInteger()) { + LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT); + if (LegalSVT.bitsLT(SVT)) break; + } + + // Let the above scalar folding handle the folding of each element. SmallVector<SDValue, 8> Ops; for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { SDValue OpN = BV->getOperand(i); - // Let the above scalar folding handle the conversion of each - // element. - OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(), - OpN); + EVT OpVT = OpN.getValueType(); + + // Build vector (integer) scalar operands may need implicit + // truncation - do this before constant folding. + if (OpVT.isInteger() && OpVT.bitsGT(InSVT)) + OpN = getNode(ISD::TRUNCATE, DL, InSVT, OpN); + + OpN = getNode(Opcode, DL, SVT, OpN); + + // Legalize the (integer) scalar constant if necessary. + if (LegalSVT != SVT) + OpN = getNode(ISD::ANY_EXTEND, DL, LegalSVT, OpN); + + if (OpN.getOpcode() != ISD::UNDEF && + OpN.getOpcode() != ISD::Constant && + OpN.getOpcode() != ISD::ConstantFP) + break; Ops.push_back(OpN); } - return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + if (Ops.size() == VT.getVectorNumElements()) + return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + break; } } } @@ -2825,7 +2979,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); else if (OpOpcode == ISD::UNDEF) // sext(undef) = 0, because the top bits will all be the same. - return getConstant(0, VT); + return getConstant(0, DL, VT); break; case ISD::ZERO_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2842,7 +2996,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, Operand.getNode()->getOperand(0)); else if (OpOpcode == ISD::UNDEF) // zext(undef) = 0, because the top bits will be zero. - return getConstant(0, VT); + return getConstant(0, DL, VT); break; case ISD::ANY_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2941,7 +3095,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDValue Ops[1] = { Operand }; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), @@ -2956,7 +3110,54 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return SDValue(N, 0); } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, +static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, + const APInt &C2) { + switch (Opcode) { + case ISD::ADD: return std::make_pair(C1 + C2, true); + case ISD::SUB: return std::make_pair(C1 - C2, true); + case ISD::MUL: return std::make_pair(C1 * C2, true); + case ISD::AND: return std::make_pair(C1 & C2, true); + case ISD::OR: return std::make_pair(C1 | C2, true); + case ISD::XOR: return std::make_pair(C1 ^ C2, true); + case ISD::SHL: return std::make_pair(C1 << C2, true); + case ISD::SRL: return std::make_pair(C1.lshr(C2), true); + case ISD::SRA: return std::make_pair(C1.ashr(C2), true); + case ISD::ROTL: return std::make_pair(C1.rotl(C2), true); + case ISD::ROTR: return std::make_pair(C1.rotr(C2), true); + case ISD::UDIV: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.udiv(C2), true); + case ISD::UREM: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.urem(C2), true); + case ISD::SDIV: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.sdiv(C2), true); + case ISD::SREM: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.srem(C2), true); + } + return std::make_pair(APInt(1, 0), false); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, + const ConstantSDNode *Cst1, + const ConstantSDNode *Cst2) { + if (Cst1->isOpaque() || Cst2->isOpaque()) + return SDValue(); + + std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(), + Cst2->getAPIntValue()); + if (!Folded.second) + return SDValue(); + return getConstant(Folded.first, DL, VT); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, SDNode *Cst1, SDNode *Cst2) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so @@ -2964,116 +3165,59 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); - SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs; - SmallVector<SDValue, 4> Outputs; - EVT SVT = VT.getScalarType(); + // Handle the case of two scalars. + if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) { + if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) { + if (SDValue Folded = + FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2)) { + if (!VT.isVector()) + return Folded; + SmallVector<SDValue, 4> Outputs; + // We may have a vector type but a scalar result. Create a splat. + Outputs.resize(VT.getVectorNumElements(), Outputs.back()); + // Build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); + } else { + return SDValue(); + } + } + } - ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1); - ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2); - if (Scalar1 && Scalar2 && (Scalar1->isOpaque() || Scalar2->isOpaque())) + // For vectors extract each constant element into Inputs so we can constant + // fold them individually. + BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); + BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); + if (!BV1 || !BV2) return SDValue(); - if (Scalar1 && Scalar2) - // Scalar instruction. - Inputs.push_back(std::make_pair(Scalar1, Scalar2)); - else { - // For vectors extract each constant element into Inputs so we can constant - // fold them individually. - BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); - BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); - if (!BV1 || !BV2) - return SDValue(); - - assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); - - for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { - ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I)); - ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I)); - if (!V1 || !V2) // Not a constant, bail. - return SDValue(); + assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); - if (V1->isOpaque() || V2->isOpaque()) - return SDValue(); - - // Avoid BUILD_VECTOR nodes that perform implicit truncation. - // FIXME: This is valid and could be handled by truncating the APInts. - if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) - return SDValue(); + EVT SVT = VT.getScalarType(); + SmallVector<SDValue, 4> Outputs; + for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { + ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I)); + ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I)); + if (!V1 || !V2) // Not a constant, bail. + return SDValue(); - Inputs.push_back(std::make_pair(V1, V2)); - } - } + if (V1->isOpaque() || V2->isOpaque()) + return SDValue(); - // We have a number of constant values, constant fold them element by element. - for (unsigned I = 0, E = Inputs.size(); I != E; ++I) { - const APInt &C1 = Inputs[I].first->getAPIntValue(); - const APInt &C2 = Inputs[I].second->getAPIntValue(); + // Avoid BUILD_VECTOR nodes that perform implicit truncation. + // FIXME: This is valid and could be handled by truncating the APInts. + if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + return SDValue(); - switch (Opcode) { - case ISD::ADD: - Outputs.push_back(getConstant(C1 + C2, SVT)); - break; - case ISD::SUB: - Outputs.push_back(getConstant(C1 - C2, SVT)); - break; - case ISD::MUL: - Outputs.push_back(getConstant(C1 * C2, SVT)); - break; - case ISD::UDIV: - if (!C2.getBoolValue()) - return SDValue(); - Outputs.push_back(getConstant(C1.udiv(C2), SVT)); - break; - case ISD::UREM: - if (!C2.getBoolValue()) - return SDValue(); - Outputs.push_back(getConstant(C1.urem(C2), SVT)); - break; - case ISD::SDIV: - if (!C2.getBoolValue()) - return SDValue(); - Outputs.push_back(getConstant(C1.sdiv(C2), SVT)); - break; - case ISD::SREM: - if (!C2.getBoolValue()) - return SDValue(); - Outputs.push_back(getConstant(C1.srem(C2), SVT)); - break; - case ISD::AND: - Outputs.push_back(getConstant(C1 & C2, SVT)); - break; - case ISD::OR: - Outputs.push_back(getConstant(C1 | C2, SVT)); - break; - case ISD::XOR: - Outputs.push_back(getConstant(C1 ^ C2, SVT)); - break; - case ISD::SHL: - Outputs.push_back(getConstant(C1 << C2, SVT)); - break; - case ISD::SRL: - Outputs.push_back(getConstant(C1.lshr(C2), SVT)); - break; - case ISD::SRA: - Outputs.push_back(getConstant(C1.ashr(C2), SVT)); - break; - case ISD::ROTL: - Outputs.push_back(getConstant(C1.rotl(C2), SVT)); - break; - case ISD::ROTR: - Outputs.push_back(getConstant(C1.rotr(C2), SVT)); - break; - default: + // Fold one vector element. + std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(), + V2->getAPIntValue()); + if (!Folded.second) return SDValue(); - } + Outputs.push_back(getConstant(Folded.first, DL, SVT)); } - assert((Scalar1 && Scalar2) || (VT.getVectorNumElements() == Outputs.size() && - "Expected a scalar or vector!")); - - // Handle the scalar case first. - if (!VT.isVector()) - return Outputs.back(); + assert(VT.getVectorNumElements() == Outputs.size() && + "Vector size mismatch!"); // We may have a vector type but a scalar result. Create a splat. Outputs.resize(VT.getVectorNumElements(), Outputs.back()); @@ -3109,6 +3253,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); + + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + EVT SVT = VT.getScalarType(); + for (SDValue Op : Elts) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + if (SVT.bitsGT(VT.getScalarType())) + for (SDValue &Op : Elts) + Op = TLI->isZExtFree(Op.getValueType(), SVT) + ? getZExtOrTrunc(Op, DL, SVT) + : getSExtOrTrunc(Op, DL, SVT); + return getNode(ISD::BUILD_VECTOR, DL, VT, Elts); } break; @@ -3273,12 +3429,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, assert(EVT.bitsLE(VT) && "Not extending!"); if (EVT == VT) return N1; // Not actually extending + auto SignExtendInReg = [&](APInt Val) { + unsigned FromBits = EVT.getScalarType().getSizeInBits(); + Val <<= Val.getBitWidth() - FromBits; + Val = Val.ashr(Val.getBitWidth() - FromBits); + return getConstant(Val, DL, VT.getScalarType()); + }; + if (N1C) { APInt Val = N1C->getAPIntValue(); - unsigned FromBits = EVT.getScalarType().getSizeInBits(); - Val <<= Val.getBitWidth()-FromBits; - Val = Val.ashr(Val.getBitWidth()-FromBits); - return getConstant(Val, VT); + return SignExtendInReg(Val); + } + if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { + SmallVector<SDValue, 8> Ops; + for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + SDValue Op = N1.getOperand(i); + if (Op.getValueType() != VT.getScalarType()) break; + if (Op.getOpcode() == ISD::UNDEF) { + Ops.push_back(Op); + continue; + } + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getNode())) { + APInt Val = C->getAPIntValue(); + Ops.push_back(SignExtendInReg(Val)); + continue; + } + break; + } + if (Ops.size() == VT.getVectorNumElements()) + return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } break; } @@ -3287,6 +3466,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (N1.getOpcode() == ISD::UNDEF) return getUNDEF(VT); + // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF + if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + return getUNDEF(VT); + // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is // expanding copies of large vectors from registers. if (N2C && @@ -3296,7 +3479,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, N1.getOperand(0).getValueType().getVectorNumElements(); return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(N2C->getZExtValue() / Factor), - getConstant(N2C->getZExtValue() % Factor, + getConstant(N2C->getZExtValue() % Factor, DL, N2.getValueType())); } @@ -3353,7 +3536,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, unsigned ElementSize = VT.getSizeInBits(); unsigned Shift = ElementSize * N2C->getZExtValue(); APInt ShiftedVal = C->getAPIntValue().lshr(Shift); - return getConstant(ShiftedVal.trunc(ElementSize), VT); + return getConstant(ShiftedVal.trunc(ElementSize), DL, VT); } break; case ISD::EXTRACT_SUBVECTOR: { @@ -3384,7 +3567,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, // Perform trivial constant folding. if (SDValue SV = - FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode())) + FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) return SV; // Canonicalize constant to RHS if commutative. @@ -3409,35 +3592,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::FADD: s = V1.add(V2, APFloat::rmNearestTiesToEven); if (!HasFPExceptions || s != APFloat::opInvalidOp) - return getConstantFP(V1, VT); + return getConstantFP(V1, DL, VT); break; case ISD::FSUB: s = V1.subtract(V2, APFloat::rmNearestTiesToEven); if (!HasFPExceptions || s!=APFloat::opInvalidOp) - return getConstantFP(V1, VT); + return getConstantFP(V1, DL, VT); break; case ISD::FMUL: s = V1.multiply(V2, APFloat::rmNearestTiesToEven); if (!HasFPExceptions || s!=APFloat::opInvalidOp) - return getConstantFP(V1, VT); + return getConstantFP(V1, DL, VT); break; case ISD::FDIV: s = V1.divide(V2, APFloat::rmNearestTiesToEven); if (!HasFPExceptions || (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)) { - return getConstantFP(V1, VT); + return getConstantFP(V1, DL, VT); } break; case ISD::FREM : s = V1.mod(V2, APFloat::rmNearestTiesToEven); if (!HasFPExceptions || (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)) { - return getConstantFP(V1, VT); + return getConstantFP(V1, DL, VT); } break; case ISD::FCOPYSIGN: V1.copySign(V2); - return getConstantFP(V1, VT); + return getConstantFP(V1, DL, VT); default: break; } } @@ -3449,7 +3632,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, // FIXME need to be more flexible about rounding mode. (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); } } @@ -3474,7 +3657,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::SRL: case ISD::SHL: if (!VT.isVector()) - return getConstant(0, VT); // fold op(undef, arg2) -> 0 + return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 // For vectors, we can't easily build an all zero vector, just return // the LHS. return N2; @@ -3489,7 +3672,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (N1.getOpcode() == ISD::UNDEF) // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). - return getConstant(0, VT); + return getConstant(0, DL, VT); // fallthrough case ISD::ADD: case ISD::ADDC: @@ -3513,13 +3696,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::SRL: case ISD::SHL: if (!VT.isVector()) - return getConstant(0, VT); // fold op(arg1, undef) -> 0 + return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 // For vectors, we can't easily build an all zero vector, just return // the LHS. return N1; case ISD::OR: if (!VT.isVector()) - return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT); // For vectors, we can't easily build an all one vector, just return // the LHS. return N1; @@ -3539,14 +3722,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (BinOpHasFlags) AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); CSEMap.InsertNode(N, IP); } else { - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); } @@ -3569,8 +3751,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, const APFloat &V3 = N3CFP->getValueAPF(); APFloat::opStatus s = V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); - if (s != APFloat::opInvalidOp) - return getConstantFP(V1, VT); + if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp) + return getConstantFP(V1, DL, VT); } break; } @@ -3643,7 +3825,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), @@ -3705,16 +3887,32 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, assert(C->getAPIntValue().getBitWidth() == 8); APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); if (VT.isInteger()) - return DAG.getConstant(Val, VT); - return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT); + return DAG.getConstant(Val, dl, VT); + return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl, + VT); } - Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); + assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?"); + EVT IntVT = VT.getScalarType(); + if (!IntVT.isInteger()) + IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits()); + + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value); if (NumBits > 8) { // Use a multiplication with 0x010101... to extend the input to the // required length. APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); - Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT)); + Value = DAG.getNode(ISD::MUL, dl, IntVT, Value, + DAG.getConstant(Magic, dl, IntVT)); + } + + if (VT != Value.getValueType() && !VT.isInteger()) + Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value); + if (VT != Value.getValueType()) { + assert(VT.getVectorElementType() == Value.getValueType() && + "value type should be one vector element here"); + SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value); + Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps); } return Value; @@ -3728,15 +3926,16 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, // Handle vector with all elements zero. if (Str.empty()) { if (VT.isInteger()) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, dl, VT); else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) - return DAG.getConstantFP(0.0, VT); + return DAG.getConstantFP(0.0, dl, VT); else if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(), - EltVT, NumElts))); + DAG.getConstant(0, dl, + EVT::getVectorVT(*DAG.getContext(), + EltVT, NumElts))); } else llvm_unreachable("Expected type!"); } @@ -3759,7 +3958,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, // of a load, then it is cost effective to turn the load into the immediate. Type *Ty = VT.getTypeForEVT(*DAG.getContext()); if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) - return DAG.getConstant(Val, VT); + return DAG.getConstant(Val, dl, VT); return SDValue(nullptr, 0); } @@ -3769,7 +3968,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl, SelectionDAG &DAG) { EVT VT = Base.getValueType(); return DAG.getNode(ISD::ADD, dl, - VT, Base, DAG.getConstant(Offset, VT)); + VT, Base, DAG.getConstant(Offset, dl, VT)); } /// isMemSrcFromString - Returns true if memcpy source is a string constant. @@ -3918,9 +4117,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = - MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4033,8 +4230,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4128,8 +4324,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4198,7 +4393,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, + bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4219,11 +4414,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memcpy with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = - TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, - isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); - if (Result.getNode()) - return Result; + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemcpy( + *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, + DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } // If we really need inline code and the target declined to provide it, // use a (potentially long) sequence of loads and stores. @@ -4254,15 +4451,16 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); - std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + .setDiscardResult() + .setTailCall(isTailCall); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, bool isVol, + unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4285,10 +4483,12 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = TSI->EmitTargetCodeForMemmove( - *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); - if (Result.getNode()) - return Result; + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemmove( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. @@ -4307,15 +4507,16 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); - std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + .setDiscardResult() + .setTailCall(isTailCall); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, bool isVol, + unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4337,10 +4538,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memset with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, - Size, Align, isVol, DstPtrInfo); - if (Result.getNode()) - return Result; + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemset( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); + if (Result.getNode()) + return Result; + } // Emit a library call. Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext()); @@ -4362,7 +4565,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); + .setDiscardResult() + .setTailCall(isTailCall); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -4379,7 +4583,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4584,7 +4788,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4685,10 +4889,10 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, assert(VT.isInteger() == MemVT.isInteger() && "Cannot convert from FP to Int or Int -> FP!"); assert(VT.isVector() == MemVT.isVector() && - "Cannot use trunc store to convert to or from a vector!"); + "Cannot use an ext load to convert to or from a vector!"); assert((!VT.isVector() || VT.getVectorNumElements() == MemVT.getVectorNumElements()) && - "Cannot use trunc store to change the number of vector elements!"); + "Cannot use an ext load to change the number of vector elements!"); } bool Indexed = AM != ISD::UNINDEXED; @@ -4706,7 +4910,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4814,7 +5018,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4883,7 +5087,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4909,7 +5113,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), @@ -4938,7 +5142,7 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4965,7 +5169,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4977,11 +5181,60 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, return SDValue(N, 0); } +SDValue +SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl, + ArrayRef<SDValue> Ops, + MachineMemOperand *MMO) { + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED, + MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + MaskedGatherSDNode *N = + new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(), + Ops, VTs, VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl, + ArrayRef<SDValue> Ops, + MachineMemOperand *MMO) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<MaskedScatterSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = + new (NodeAllocator) MaskedScatterSDNode(dl.getIROrder(), dl.getDebugLoc(), + Ops, VTs, VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue SV, unsigned Align) { - SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) }; + SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); } @@ -5041,7 +5294,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), @@ -5096,7 +5349,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); if (NumOps == 1) { @@ -5340,17 +5593,9 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { assert(N->getNumOperands() == NumOps && "Update with wrong number of operands"); - // Check to see if there is no change. - bool AnyChange = false; - for (unsigned i = 0; i != NumOps; ++i) { - if (Ops[i] != N->getOperand(i)) { - AnyChange = true; - break; - } - } - - // No operands changed, just return the input node. - if (!AnyChange) return N; + // If no operands changed just return the input node. + if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin())) + return N; // See if the modified node already exists. void *InsertPos = nullptr; @@ -5498,8 +5743,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, /// For IROrder, we keep the smaller of the two SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { DebugLoc NLoc = N->getDebugLoc(); - if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && - (OLoc.getDebugLoc() != NLoc)) { + if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) { N->setDebugLoc(DebugLoc()); } unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); @@ -5531,7 +5775,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops); - if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *ON = FindNodeOrInsertPos(ID, N->getDebugLoc(), IP)) return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); } @@ -5737,7 +5981,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, FoldingSetNodeID ID; AddNodeIDNode(ID, ~Opcode, VTs, OpsArray); IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) { return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL)); } } @@ -5769,7 +6013,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand) { - SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, Operand, SRIdxVal); return SDValue(Subreg, 0); @@ -5780,7 +6024,7 @@ SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand, SDValue Subreg) { - SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, Operand, Subreg, SRIdxVal); return SDValue(Result, 0); @@ -5797,7 +6041,7 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, if (isBinOpWithFlags(Opcode)) AddBinaryNodeIDCustom(ID, nuw, nsw, exact); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) return E; } return nullptr; @@ -5809,21 +6053,28 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool IsIndirect, uint64_t Off, DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) + SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O); } /// Constant SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t Off, DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O); } /// FrameIndex SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t Off, DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O); } namespace { @@ -6489,7 +6740,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - getConstant(i, TLI->getVectorIdxTy())); + getConstant(i, dl, TLI->getVectorIdxTy())); } else { // A scalar operand; just use it as is. Operands[j] = Operand; @@ -6595,8 +6846,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); - llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, - TLI->getDataLayout()); + llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne, + *TLI->getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) @@ -6652,9 +6903,10 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, "More vector elements requested than available!"); SDValue Lo, Hi; Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, - getConstant(0, TLI->getVectorIdxTy())); + getConstant(0, DL, TLI->getVectorIdxTy())); Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, - getConstant(LoVT.getVectorNumElements(), TLI->getVectorIdxTy())); + getConstant(LoVT.getVectorNumElements(), DL, + TLI->getVectorIdxTy())); return std::make_pair(Lo, Hi); } @@ -6670,7 +6922,7 @@ void SelectionDAG::ExtractVectorElements(SDValue Op, SDLoc SL(Op); for (unsigned i = Start, e = Start + Count; i != e; ++i) { Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, - Op, getConstant(i, IdxTy))); + Op, getConstant(i, SL, IdxTy))); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fe9e442..85303d2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -20,8 +20,8 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -57,7 +58,6 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSelectionDAGInfo.h" @@ -161,7 +161,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, - DAG.getConstant(Lo.getValueType().getSizeInBits(), + DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, TLI.getPointerTy())); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); @@ -208,7 +208,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, - DAG.getTargetConstant(1, TLI.getPointerTy())); + DAG.getTargetConstant(1, DL, TLI.getPointerTy())); return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } @@ -301,7 +301,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); } // Vector/Vector bitcast. @@ -425,7 +425,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, - DAG.getIntPtrConstant(RoundBits)); + DAG.getIntPtrConstant(RoundBits, DL)); getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); if (TLI.isBigEndian()) @@ -452,9 +452,9 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, SDValue &Part1 = Parts[i+StepSize/2]; Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, - ThisVT, Part0, DAG.getIntPtrConstant(1)); + ThisVT, Part0, DAG.getIntPtrConstant(1, DL)); Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, - ThisVT, Part0, DAG.getIntPtrConstant(0)); + ThisVT, Part0, DAG.getIntPtrConstant(0, DL)); if (ThisBits == PartBits && ThisVT != PartVT) { Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); @@ -493,7 +493,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, SmallVector<SDValue, 16> Ops; for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ElementVT, Val, DAG.getConstant(i, + ElementVT, Val, DAG.getConstant(i, DL, TLI.getVectorIdxTy()))); for (unsigned i = ValueVT.getVectorNumElements(), @@ -520,7 +520,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); + PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); bool Smaller = ValueVT.bitsLE(PartVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), @@ -550,12 +551,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, if (IntermediateVT.isVector()) Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getConstant(i * (NumElements / NumIntermediates), + DAG.getConstant(i * (NumElements / NumIntermediates), DL, TLI.getVectorIdxTy())); else Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, - DAG.getConstant(i, TLI.getVectorIdxTy())); + DAG.getConstant(i, DL, TLI.getVectorIdxTy())); } // Split the intermediate operands into legal parts. @@ -576,93 +577,25 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, } } -namespace { - /// RegsForValue - This struct represents the registers (physical or virtual) - /// that a particular set of values is assigned, and the type information - /// about the value. The most common situation is to represent one value at a - /// time, but struct or array values are handled element-wise as multiple - /// values. The splitting of aggregates is performed recursively, so that we - /// never have aggregate-typed registers. The values at this point do not - /// necessarily have legal types, so each value may require one or more - /// registers of some legal type. - /// - struct RegsForValue { - /// ValueVTs - The value types of the values, which may not be legal, and - /// may need be promoted or synthesized from one or more registers. - /// - SmallVector<EVT, 4> ValueVTs; +RegsForValue::RegsForValue() {} - /// RegVTs - The value types of the registers. This is the same size as - /// ValueVTs and it records, for each value, what the type of the assigned - /// register or registers are. (Individual values are never synthesized - /// from more than one type of register.) - /// - /// With virtual registers, the contents of RegVTs is redundant with TLI's - /// getRegisterType member function, however when with physical registers - /// it is necessary to have a separate record of the types. - /// - SmallVector<MVT, 4> RegVTs; - - /// Regs - This list holds the registers assigned to the values. - /// Each legal or promoted value requires one register, and each - /// expanded value requires multiple registers. - /// - SmallVector<unsigned, 4> Regs; - - RegsForValue() {} - - RegsForValue(const SmallVector<unsigned, 4> ®s, - MVT regvt, EVT valuevt) - : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - - RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, Type *Ty) { - ComputeValueVTs(tli, Ty, ValueVTs); - - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); - MVT RegisterVT = tli.getRegisterType(Context, ValueVT); - for (unsigned i = 0; i != NumRegs; ++i) - Regs.push_back(Reg + i); - RegVTs.push_back(RegisterVT); - Reg += NumRegs; - } - } +RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, + EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - /// append - Add the specified values to this one. - void append(const RegsForValue &RHS) { - ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); - RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); - Regs.append(RHS.Regs.begin(), RHS.Regs.end()); - } +RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &tli, + unsigned Reg, Type *Ty) { + ComputeValueVTs(tli, Ty, ValueVTs); - /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from - /// this value and returns the result as a ValueVTs value. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - SDLoc dl, - SDValue &Chain, SDValue *Flag, - const Value *V = nullptr) const; - - /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the - /// specified value into the registers specified by this object. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - void - getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, - SDValue *Flag, const Value *V, - ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; - - /// AddInlineAsmOperands - Add this value to the specified inlineasm node - /// operand list. This adds the code marker, matching input operand index - /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector<SDValue> &Ops) const; - }; + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); + MVT RegisterVT = tli.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } } /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from @@ -721,7 +654,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // The current value is a zero. // Explicitly express that as it would be easier for // optimizations to kick in. - Parts[i] = DAG.getConstant(0, RegisterVT); + Parts[i] = DAG.getConstant(0, dl, RegisterVT); continue; } @@ -823,7 +756,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, /// operand list. This adds the code marker and includes the number of /// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, - unsigned MatchingIdx, + unsigned MatchingIdx, SDLoc dl, SelectionDAG &DAG, std::vector<SDValue> &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -843,7 +776,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); } - SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); + SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); @@ -869,7 +802,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - DL = DAG.getSubtarget().getDataLayout(); + DL = DAG.getTarget().getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -997,14 +930,16 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, const DbgValueInst *DI = DDI.getDI(); DebugLoc dl = DDI.getdl(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); - MDNode *Variable = DI->getVariable(); - MDNode *Expr = DI->getExpression(); + DILocalVariable *Variable = DI->getVariable(); + DIExpression *Expr = DI->getExpression(); + assert(Variable->isValidLocationForIntrinsic(dl) && + "Expected inlined-at fields to agree"); uint64_t Offset = DI->getOffset(); // A dbg.value for an alloca is always indirect. bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect, + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect, Val)) { SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), IsIndirect, Offset, dl, DbgSDNodeOrder); @@ -1016,6 +951,24 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, } } +/// getCopyFromRegs - If there was virtual register allocated for the value V +/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. +SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { + DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); + SDValue Result; + + if (It != FuncInfo.ValueMap.end()) { + unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, + Ty); + SDValue Chain = DAG.getEntryNode(); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + resolveDanglingDebugInfo(V, Result); + } + + return Result; +} + /// getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { // If we already have an SDValue for this value, use it. It's important @@ -1026,15 +979,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { // If there's a virtual register allocated and initialized for this // value, use it. - DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); - if (It != FuncInfo.ValueMap.end()) { - unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, - V->getType()); - SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); - resolveDanglingDebugInfo(V, N); - return N; + SDValue copyFromReg = getCopyFromRegs(V, V->getType()); + if (copyFromReg.getNode()) { + return copyFromReg; } // Otherwise create a new SDValue and remember it. @@ -1044,6 +991,12 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { return Val; } +// Return true if SDValue exists for the given Value +bool SelectionDAGBuilder::findValue(const Value *V) const { + return (NodeMap.find(V) != NodeMap.end()) || + (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end()); +} + /// getNonRegisterValue - Return an SDValue for the given Value, but /// don't look in FuncInfo.ValueMap for a virtual register. SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { @@ -1067,18 +1020,18 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { EVT VT = TLI.getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) - return DAG.getConstant(*CI, VT); + return DAG.getConstant(*CI, getCurSDLoc(), VT); if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); if (isa<ConstantPointerNull>(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); - return DAG.getConstant(0, TLI.getPointerTy(AS)); + return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(AS)); } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) - return DAG.getConstantFP(*CFP, VT); + return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) return DAG.getUNDEF(VT); @@ -1138,9 +1091,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa<UndefValue>(C)) Constants[i] = DAG.getUNDEF(EltVT); else if (EltVT.isFloatingPoint()) - Constants[i] = DAG.getConstantFP(0, EltVT); + Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else - Constants[i] = DAG.getConstant(0, EltVT); + Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT); } return DAG.getMergeValues(Constants, getCurSDLoc()); @@ -1164,9 +1117,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { SDValue Op; if (EltVT.isFloatingPoint()) - Op = DAG.getConstantFP(0, EltVT); + Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else - Op = DAG.getConstant(0, EltVT); + Op = DAG.getConstant(0, getCurSDLoc(), EltVT); Ops.assign(NumElements, Op); } @@ -1223,7 +1176,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i != NumValues; ++i) { SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), RetPtr.getValueType(), RetPtr, - DAG.getIntPtrConstant(Offsets[i])); + DAG.getIntPtrConstant(Offsets[i], + getCurSDLoc())); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), @@ -1573,19 +1527,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = BrMBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - if (I.isUnconditional()) { // Update machine-CFG edges. BrMBB->addSuccessor(Succ0MBB); // If this is not a fall-through branch or optimizations are switched off, // emit the branch. - if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None) + if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); @@ -1674,7 +1622,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, Cond = CondLHS; else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && CB.CC == ISD::SETEQ) { - SDValue True = DAG.getConstant(1, CondLHS.getValueType()); + SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); @@ -1682,19 +1630,19 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); - const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); + const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { - Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), + Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT), ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, - VT, CmpOp, DAG.getConstant(Low, VT)); + VT, CmpOp, DAG.getConstant(Low, dl, VT)); Cond = DAG.getSetCC(dl, MVT::i1, SUB, - DAG.getConstant(High-Low, VT), ISD::SETULE); + DAG.getConstant(High-Low, dl, VT), ISD::SETULE); } } @@ -1705,18 +1653,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, if (CB.TrueBB != CB.FalseBB) addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - // If the lhs block is the next block, invert the condition so that we can // fall through to the lhs instead of the rhs block. - if (CB.TrueBB == NextBlock) { + if (CB.TrueBB == NextBlock(SwitchBB)) { std::swap(CB.TrueBB, CB.FalseBB); - SDValue True = DAG.getConstant(1, Cond.getValueType()); + SDValue True = DAG.getConstant(1, dl, Cond.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); } @@ -1752,13 +1693,15 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + // Subtract the lowest switch case value from the value being switched on and // conditional branch to default mbb if the result is greater than the // difference between smallest and largest cases. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, - DAG.getConstant(JTH.First, VT)); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, + DAG.getConstant(JTH.First, dl, VT)); // The SDNode we just created, which holds the value being switched on minus // the smallest case value, needs to be copied to a virtual register so it @@ -1766,10 +1709,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy()); + SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy()); unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1777,24 +1720,18 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // for the switch statement if the value being switched on exceeds the largest // case in the switch. SDValue CMP = - DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT); + DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), + ISD::SETUGT); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); - if (JT.MBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != NextBlock(SwitchBB)) + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); DAG.setRoot(BrCond); @@ -1824,6 +1761,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); SDValue Guard; + SDLoc dl = getCurSDLoc(); // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the // guard value from the virtual register holding the value. Otherwise, emit a @@ -1831,34 +1769,34 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, unsigned GuardReg = SPD.getGuardReg(); if (GuardReg && TLI.useLoadStackGuardNode()) - Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg, + Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg, PtrTy); else - Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), GuardPtr, MachinePointerInfo(IRGuard, 0), true, false, false, Align); - SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(FI), true, false, false, Align); // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); SDValue Cmp = - DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), + DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), Sub.getValueType()), - Sub, DAG.getConstant(0, VT), ISD::SETNE); + Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, StackSlot.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. - SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), + SDValue Br = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(SPD.getSuccessMBB())); @@ -1886,18 +1824,20 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, - DAG.getConstant(B.First, VT)); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, + DAG.getConstant(B.First, dl, VT)); // Check range const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue RangeCmp = - DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); + DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; @@ -1914,32 +1854,25 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } if (UsePtrType) { VT = TLI.getPointerTy(); - Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); + Sub = DAG.getZExtOrTrunc(Sub, dl, VT); } B.RegVT = VT.getSimpleVT(); B.Reg = FuncInfo.CreateReg(B.RegVT); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), - B.Reg, Sub); - - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub); MachineBasicBlock* MBB = B.Cases[0].ThisBB; addSuccessorWithWeight(SwitchBB, B.Default); addSuccessorWithWeight(SwitchBB, MBB); - SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); - if (MBB != NextBlock) - BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, + // Avoid emitting unnecessary branches to the next block. + if (MBB != NextBlock(SwitchBB)) + BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, DAG.getBasicBlock(MBB)); DAG.setRoot(BrRange); @@ -1952,34 +1885,33 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); MVT VT = BB.RegVT; - SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), - Reg, VT); + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); SDValue Cmp; - unsigned PopCount = CountPopulation_64(B.Mask); + unsigned PopCount = countPopulation(B.Mask); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC( - getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); + dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. Cmp = DAG.getSetCC( - getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE); + dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), ISD::SETNE); } else { // Make desired shift - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, - DAG.getConstant(1, VT), ShiftOp); + SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, + DAG.getConstant(1, dl, VT), ShiftOp); // Emit bit tests and jumps - SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), - VT, SwitchVal, DAG.getConstant(B.Mask, VT)); - Cmp = DAG.getSetCC(getCurSDLoc(), - TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, - DAG.getConstant(0, VT), ISD::SETNE); + SDValue AndOp = DAG.getNode(ISD::AND, dl, + VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); + Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, + DAG.getConstant(0, dl, VT), ISD::SETNE); } // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. @@ -1987,19 +1919,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); - SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(B.TargetBB)); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - - if (NextMBB != NextBlock) - BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, + // Avoid emitting unnecessary branches to the next block. + if (NextMBB != NextBlock(SwitchBB)) + BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); DAG.setRoot(BrAnd); @@ -2027,13 +1953,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { case Intrinsic::experimental_patchpoint_i64: visitPatchpoint(&I, LandingPad); break; + case Intrinsic::experimental_gc_statepoint: + LowerStatepoint(ImmutableStatepoint(&I), LandingPad); + break; } } else LowerCallTo(&I, getValue(Callee), false, LandingPad); // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. - CopyToExportRegsIfNeeded(&I); + // We already took care of the exported value for the statepoint instruction + // during call to the LowerStatepoint. + if (!isStatepoint(I)) { + CopyToExportRegsIfNeeded(&I); + } // Update successor info addSuccessorWithWeight(InvokeMBB, Return); @@ -2065,622 +1998,86 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { return; SmallVector<EVT, 2> ValueVTs; + SDLoc dl = getCurSDLoc(); ComputeValueVTs(TLI, LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; - Ops[0] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), - FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), - getCurSDLoc(), ValueVTs[0]); + if (FuncInfo.ExceptionPointerVirtReg) { + Ops[0] = DAG.getZExtOrTrunc( + DAG.getCopyFromReg(DAG.getEntryNode(), dl, + FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), + dl, ValueVTs[0]); + } else { + Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy()); + } Ops[1] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + DAG.getCopyFromReg(DAG.getEntryNode(), dl, FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), - getCurSDLoc(), ValueVTs[1]); + dl, ValueVTs[1]); // Merge into one. - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Ops); setValue(&LP, Res); } -/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for -/// small case ranges). -bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock *Default, - MachineBasicBlock *SwitchBB) { - // Size is the number of Cases represented by this range. - size_t Size = CR.Range.second - CR.Range.first; - if (Size > 3) - return false; - - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = CR.CaseBB; - - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - - BranchProbabilityInfo *BPI = FuncInfo.BPI; - // If any two of the cases has the same destination, and if one value - // is the same as the other, but has one bit unset that the other has set, - // use bit manipulation to do two compares at once. For example: - // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" - // TODO: This could be extended to merge any 2 cases in switches with 3 cases. - // TODO: Handle cases where CR.CaseBB != SwitchBB. - if (Size == 2 && CR.CaseBB == SwitchBB) { - Case &Small = *CR.Range.first; - Case &Big = *(CR.Range.second-1); - - if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { - const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue(); - const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue(); - - // Check that there is only one bit different. - if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && - (SmallValue | BigValue) == BigValue) { - // Isolate the common bit. - APInt CommonBit = BigValue & ~SmallValue; - assert((SmallValue | CommonBit) == BigValue && - CommonBit.countPopulation() == 1 && "Not a common bit?"); - - SDValue CondLHS = getValue(SV); - EVT VT = CondLHS.getValueType(); - SDLoc DL = getCurSDLoc(); - - SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, - DAG.getConstant(CommonBit, VT)); - SDValue Cond = DAG.getSetCC(DL, MVT::i1, - Or, DAG.getConstant(BigValue, VT), - ISD::SETEQ); - - // Update successor info. - // Both Small and Big will jump to Small.BB, so we sum up the weights. - addSuccessorWithWeight(SwitchBB, Small.BB, - Small.ExtraWeight + Big.ExtraWeight); - addSuccessorWithWeight(SwitchBB, Default, - // The default destination is the first successor in IR. - BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); - - // Insert the true branch. - SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, - getControlRoot(), Cond, - DAG.getBasicBlock(Small.BB)); - - // Insert the false branch. - BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, - DAG.getBasicBlock(Default)); - - DAG.setRoot(BrCond); - return true; - } - } - } - - // Order cases by weight so the most likely case will be checked first. - uint32_t UnhandledWeights = 0; - if (BPI) { - for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { - uint32_t IWeight = I->ExtraWeight; - UnhandledWeights += IWeight; - for (CaseItr J = CR.Range.first; J < I; ++J) { - uint32_t JWeight = J->ExtraWeight; - if (IWeight > JWeight) - std::swap(*I, *J); - } - } - } - // Rearrange the case blocks so that the last one falls through if possible. - Case &BackCase = *(CR.Range.second-1); - if (Size > 1 && - NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { - // The last case block won't fall through into 'NextBlock' if we emit the - // branches in this order. See if rearranging a case value would help. - // We start at the bottom as it's the case with the least weight. - for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) - if (I->BB == NextBlock) { - std::swap(*I, BackCase); - break; - } - } - - // Create a CaseBlock record representing a conditional branch to - // the Case's target mbb if the value being switched on SV is equal - // to C. - MachineBasicBlock *CurBlock = CR.CaseBB; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - MachineBasicBlock *FallThrough; - if (I != E-1) { - FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); - CurMF->insert(BBI, FallThrough); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } else { - // If the last case doesn't match, go to the default block. - FallThrough = Default; - } - - const Value *RHS, *LHS, *MHS; - ISD::CondCode CC; - if (I->High == I->Low) { - // This is just small small case range :) containing exactly 1 case - CC = ISD::SETEQ; - LHS = SV; RHS = I->High; MHS = nullptr; - } else { - CC = ISD::SETLE; - LHS = I->Low; MHS = SV; RHS = I->High; - } - - // The false weight should be sum of all un-handled cases. - UnhandledWeights -= I->ExtraWeight; - CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, - /* me */ CurBlock, - /* trueweight */ I->ExtraWeight, - /* falseweight */ UnhandledWeights); - - // If emitting the first comparison, just call visitSwitchCase to emit the - // code into the current block. Otherwise, push the CaseBlock onto the - // vector to be later processed by SDISel, and insert the node's MBB - // before the next MBB. - if (CurBlock == SwitchBB) - visitSwitchCase(CB, SwitchBB); - else - SwitchCases.push_back(CB); - - CurBlock = FallThrough; - } - - return true; -} - -static inline bool areJTsAllowed(const TargetLowering &TLI) { - return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); -} - -static APInt ComputeRange(const APInt &First, const APInt &Last) { - uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); - return (LastExt - FirstExt + 1ULL); -} - -/// handleJTSwitchCase - Emit jumptable for current switch case range -bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, - CaseRecVector &WorkList, - const Value *SV, - MachineBasicBlock *Default, - MachineBasicBlock *SwitchBB) { - Case& FrontCase = *CR.Range.first; - Case& BackCase = *(CR.Range.second-1); - - const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); - - APInt TSize(First.getBitWidth(), 0); - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) - TSize += I->size(); +unsigned +SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, + MachineBasicBlock *LPadBB) { + SDValue Chain = getControlRoot(); + SDLoc dl = getCurSDLoc(); + // Get the typeid that we will dispatch on later. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) - return false; - - APInt Range = ComputeRange(First, Last); - // The density is TSize / Range. Require at least 40%. - // It should not be possible for IntTSize to saturate for sane code, but make - // sure we handle Range saturation correctly. - uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10); - uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10); - if (IntTSize * 10 < IntRange * 4) - return false; - - DEBUG(dbgs() << "Lowering jump table\n" - << "First entry: " << First << ". Last entry: " << Last << '\n' - << "Range: " << Range << ". Size: " << TSize << ".\n\n"); - - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; + const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); + unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); + SDValue Sel = DAG.getConstant(TypeID, dl, TLI.getPointerTy()); + Chain = DAG.getCopyToReg(Chain, dl, VReg, Sel); + + // Branch to the main landing pad block. + MachineBasicBlock *ClauseMBB = FuncInfo.MBB; + ClauseMBB->addSuccessor(LPadBB); + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, Chain, + DAG.getBasicBlock(LPadBB))); + return VReg; +} + +void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { +#ifndef NDEBUG + for (const CaseCluster &CC : Clusters) + assert(CC.Low == CC.High && "Input clusters must be single-case"); +#endif - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - - // Create a new basic block to hold the code for loading the address - // of the jump table, and jumping to it. Update successor information; - // we will either branch to the default case for the switch, or the jump - // table. - MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, JumpTableBB); - - addSuccessorWithWeight(CR.CaseBB, Default); - addSuccessorWithWeight(CR.CaseBB, JumpTableBB); - - // Build a vector of destination BBs, corresponding to each target - // of the jump table. If the value of the jump table slot corresponds to - // a case statement, push the case's BB onto the vector, otherwise, push - // the default BB. - std::vector<MachineBasicBlock*> DestBBs; - APInt TEI = First; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { - const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); - const APInt &High = cast<ConstantInt>(I->High)->getValue(); - - if (Low.sle(TEI) && TEI.sle(High)) { - DestBBs.push_back(I->BB); - if (TEI==High) - ++I; + std::sort(Clusters.begin(), Clusters.end(), + [](const CaseCluster &a, const CaseCluster &b) { + return a.Low->getValue().slt(b.Low->getValue()); + }); + + // Merge adjacent clusters with the same destination. + const unsigned N = Clusters.size(); + unsigned DstIndex = 0; + for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) { + CaseCluster &CC = Clusters[SrcIndex]; + const ConstantInt *CaseVal = CC.Low; + MachineBasicBlock *Succ = CC.MBB; + + if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ && + (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) { + // If this case has the same successor and is a neighbour, merge it into + // the previous cluster. + Clusters[DstIndex - 1].High = CaseVal; + Clusters[DstIndex - 1].Weight += CC.Weight; + assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!"); } else { - DestBBs.push_back(Default); + std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], + sizeof(Clusters[SrcIndex])); } } - - // Calculate weight for each unique destination in CR. - DenseMap<MachineBasicBlock*, uint32_t> DestWeights; - if (FuncInfo.BPI) - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = - DestWeights.find(I->BB); - if (Itr != DestWeights.end()) - Itr->second += I->ExtraWeight; - else - DestWeights[I->BB] = I->ExtraWeight; - } - - // Update successor info. Add one edge to each unique successor. - BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); - for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), - E = DestBBs.end(); I != E; ++I) { - if (!SuccsHandled[(*I)->getNumber()]) { - SuccsHandled[(*I)->getNumber()] = true; - DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = - DestWeights.find(*I); - addSuccessorWithWeight(JumpTableBB, *I, - Itr != DestWeights.end() ? Itr->second : 0); - } - } - - // Create a jump table index for this jump table. - unsigned JTEncoding = TLI.getJumpTableEncoding(); - unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) - ->createJumpTableIndex(DestBBs); - - // Set the jump table information so that we can codegen it as a second - // MachineBasicBlock - JumpTable JT(-1U, JTI, JumpTableBB, Default); - JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); - if (CR.CaseBB == SwitchBB) - visitJumpTableHeader(JT, JTH, SwitchBB); - - JTCases.push_back(JumpTableBlock(JTH, JT)); - return true; -} - -/// handleBTSplitSwitchCase - emit comparison and split binary search tree into -/// 2 subtrees. -bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* SwitchBB) { - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; - - Case& FrontCase = *CR.Range.first; - Case& BackCase = *(CR.Range.second-1); - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - - // Size is the number of Cases represented by this range. - unsigned Size = CR.Range.second - CR.Range.first; - - const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); - double FMetric = 0; - CaseItr Pivot = CR.Range.first + Size/2; - - // Select optimal pivot, maximizing sum density of LHS and RHS. This will - // (heuristically) allow us to emit JumpTable's later. - APInt TSize(First.getBitWidth(), 0); - for (CaseItr I = CR.Range.first, E = CR.Range.second; - I!=E; ++I) - TSize += I->size(); - - APInt LSize = FrontCase.size(); - APInt RSize = TSize-LSize; - DEBUG(dbgs() << "Selecting best pivot: \n" - << "First: " << First << ", Last: " << Last <<'\n' - << "LSize: " << LSize << ", RSize: " << RSize << '\n'); - for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; - J!=E; ++I, ++J) { - const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); - const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue(); - APInt Range = ComputeRange(LEnd, RBegin); - assert((Range - 2ULL).isNonNegative() && - "Invalid case distance"); - // Use volatile double here to avoid excess precision issues on some hosts, - // e.g. that use 80-bit X87 registers. - volatile double LDensity = - (double)LSize.roundToDouble() / - (LEnd - First + 1ULL).roundToDouble(); - volatile double RDensity = - (double)RSize.roundToDouble() / - (Last - RBegin + 1ULL).roundToDouble(); - volatile double Metric = Range.logBase2()*(LDensity+RDensity); - // Should always split in some non-trivial place - DEBUG(dbgs() <<"=>Step\n" - << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' - << "LDensity: " << LDensity - << ", RDensity: " << RDensity << '\n' - << "Metric: " << Metric << '\n'); - if (FMetric < Metric) { - Pivot = J; - FMetric = Metric; - DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); - } - - LSize += J->size(); - RSize -= J->size(); - } - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (areJTsAllowed(TLI)) { - // If our case is dense we *really* should handle it earlier! - assert((FMetric > 0) && "Should handle dense range earlier!"); - } else { - Pivot = CR.Range.first + Size/2; - } - - CaseRange LHSR(CR.Range.first, Pivot); - CaseRange RHSR(Pivot, CR.Range.second); - const Constant *C = Pivot->Low; - MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr; - - // We know that we branch to the LHS if the Value being switched on is - // less than the Pivot value, C. We use this to optimize our binary - // tree a bit, by recognizing that if SV is greater than or equal to the - // LHS's Case Value, and that Case Value is exactly one less than the - // Pivot's Value, then we can branch directly to the LHS's Target, - // rather than creating a leaf node for it. - if ((LHSR.second - LHSR.first) == 1 && - LHSR.first->High == CR.GE && - cast<ConstantInt>(C)->getValue() == - (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { - TrueBB = LHSR.first->BB; - } else { - TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, TrueBB); - WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } - - // Similar to the optimization above, if the Value being switched on is - // known to be less than the Constant CR.LT, and the current Case Value - // is CR.LT - 1, then we can branch directly to the target block for - // the current Case Value, rather than emitting a RHS leaf node for it. - if ((RHSR.second - RHSR.first) == 1 && CR.LT && - cast<ConstantInt>(RHSR.first->Low)->getValue() == - (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { - FalseBB = RHSR.first->BB; - } else { - FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, FalseBB); - WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } - - // Create a CaseBlock record representing a conditional branch to - // the LHS node if the value being switched on SV is less than C. - // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB); - - if (CR.CaseBB == SwitchBB) - visitSwitchCase(CB, SwitchBB); - else - SwitchCases.push_back(CB); - - return true; -} - -/// handleBitTestsSwitchCase - if current case range has few destination and -/// range span less, than machine word bitwidth, encode case range into series -/// of masks and emit bit tests with these masks. -bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, - MachineBasicBlock* SwitchBB) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PTy = TLI.getPointerTy(); - unsigned IntPtrBits = PTy.getSizeInBits(); - - Case& FrontCase = *CR.Range.first; - Case& BackCase = *(CR.Range.second-1); - - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI.isOperationLegal(ISD::SHL, PTy)) - return false; - - size_t numCmps = 0; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - // Single case counts one, case range - two. - numCmps += (I->Low == I->High ? 1 : 2); - } - - // Count unique destinations - SmallSet<MachineBasicBlock*, 4> Dests; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - Dests.insert(I->BB); - if (Dests.size() > 3) - // Don't bother the code below, if there are too much unique destinations - return false; - } - DEBUG(dbgs() << "Total number of unique destinations: " - << Dests.size() << '\n' - << "Total number of comparisons: " << numCmps << '\n'); - - // Compute span of values. - const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); - APInt cmpRange = maxValue - minValue; - - DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' - << "Low bound: " << minValue << '\n' - << "High bound: " << maxValue << '\n'); - - if (cmpRange.uge(IntPtrBits) || - (!(Dests.size() == 1 && numCmps >= 3) && - !(Dests.size() == 2 && numCmps >= 5) && - !(Dests.size() >= 3 && numCmps >= 6))) - return false; - - DEBUG(dbgs() << "Emitting bit tests\n"); - APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); - - // Optimize the case where all the case values fit in a - // word without having to subtract minValue. In this case, - // we can optimize away the subtraction. - if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { - cmpRange = maxValue; - } else { - lowBound = minValue; - } - - CaseBitsVector CasesBits; - unsigned i, count = 0; - - for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { - MachineBasicBlock* Dest = I->BB; - for (i = 0; i < count; ++i) - if (Dest == CasesBits[i].BB) - break; - - if (i == count) { - assert((count < 3) && "Too much destinations to test!"); - CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); - count++; - } - - const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue(); - const APInt& highValue = cast<ConstantInt>(I->High)->getValue(); - - uint64_t lo = (lowValue - lowBound).getZExtValue(); - uint64_t hi = (highValue - lowBound).getZExtValue(); - CasesBits[i].ExtraWeight += I->ExtraWeight; - - for (uint64_t j = lo; j <= hi; j++) { - CasesBits[i].Mask |= 1ULL << j; - CasesBits[i].Bits++; - } - - } - std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); - - BitTestInfo BTC; - - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; - - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - - DEBUG(dbgs() << "Cases:\n"); - for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { - DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask - << ", Bits: " << CasesBits[i].Bits - << ", BB: " << CasesBits[i].BB << '\n'); - - MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, CaseBB); - BTC.push_back(BitTestCase(CasesBits[i].Mask, - CaseBB, - CasesBits[i].BB, CasesBits[i].ExtraWeight)); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } - - BitTestBlock BTB(lowBound, cmpRange, SV, - -1U, MVT::Other, (CR.CaseBB == SwitchBB), - CR.CaseBB, Default, std::move(BTC)); - - if (CR.CaseBB == SwitchBB) - visitBitTestHeader(BTB, SwitchBB); - - BitTestCases.push_back(std::move(BTB)); - - return true; -} - -/// Clusterify - Transform simple list of Cases into list of CaseRange's -void SelectionDAGBuilder::Clusterify(CaseVector& Cases, - const SwitchInst& SI) { - BranchProbabilityInfo *BPI = FuncInfo.BPI; - // Start with "simple" cases. - for (SwitchInst::ConstCaseIt i : SI.cases()) { - const BasicBlock *SuccBB = i.getCaseSuccessor(); - MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - - uint32_t ExtraWeight = - BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0; - - Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), - SMBB, ExtraWeight)); - } - std::sort(Cases.begin(), Cases.end(), CaseCmp()); - - // Merge case into clusters - if (Cases.size() >= 2) - // Must recompute end() each iteration because it may be - // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); - J != Cases.end(); ) { - const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); - const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); - MachineBasicBlock* nextBB = J->BB; - MachineBasicBlock* currentBB = I->BB; - - // If the two neighboring cases go to the same destination, merge them - // into a single case. - if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { - I->High = J->High; - I->ExtraWeight += J->ExtraWeight; - J = Cases.erase(J); - } else { - I = J++; - } - } - - DEBUG({ - size_t numCmps = 0; - for (auto &I : Cases) - // A range counts double, since it requires two compares. - numCmps += I.Low != I.High ? 2 : 1; - - dbgs() << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << '\n'; - }); + Clusters.resize(DstIndex); } void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, @@ -2696,96 +2093,6 @@ void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, BitTestCases[i].Parent = Last; } -void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { - MachineBasicBlock *SwitchMBB = FuncInfo.MBB; - - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = nullptr; - if (SwitchMBB + 1 != FuncInfo.MF->end()) - NextBlock = SwitchMBB + 1; - - - // Create a vector of Cases, sorted so that we can efficiently create a binary - // search tree from them. - CaseVector Cases; - Clusterify(Cases, SI); - - // Get the default destination MBB. - MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; - - if (isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()) && - !Cases.empty()) { - // Replace an unreachable default destination with the most popular case - // destination. - DenseMap<const BasicBlock *, unsigned> Popularity; - unsigned MaxPop = 0; - const BasicBlock *MaxBB = nullptr; - for (auto I : SI.cases()) { - const BasicBlock *BB = I.getCaseSuccessor(); - if (++Popularity[BB] > MaxPop) { - MaxPop = Popularity[BB]; - MaxBB = BB; - } - } - - // Set new default. - assert(MaxPop > 0); - assert(MaxBB); - Default = FuncInfo.MBBMap[MaxBB]; - - // Remove cases that were pointing to the destination that is now the default. - Cases.erase(std::remove_if(Cases.begin(), Cases.end(), - [&](const Case &C) { return C.BB == Default; }), - Cases.end()); - } - - // If there is only the default destination, go there directly. - if (Cases.empty()) { - // Update machine-CFG edges. - SwitchMBB->addSuccessor(Default); - - // If this is not a fall-through branch, emit the branch. - if (Default != NextBlock) { - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, - getControlRoot(), DAG.getBasicBlock(Default))); - } - return; - } - - // Get the Value to be switched on. - const Value *SV = SI.getCondition(); - - // Push the initial CaseRec onto the worklist - CaseRecVector WorkList; - WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr, - CaseRange(Cases.begin(),Cases.end()))); - - while (!WorkList.empty()) { - // Grab a record representing a case range to process off the worklist - CaseRec CR = WorkList.back(); - WorkList.pop_back(); - - if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) - continue; - - // If the range has few cases (two or less) emit a series of specific - // tests. - if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) - continue; - - // If the switch has more than N blocks, and is at least 40% dense, and the - // target supports indirect branches, then emit a jump table rather than - // lowering the switch to a binary tree of conditional branches. - // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). - if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) - continue; - - // Emit binary tree. We need to pick a pivot, and push left and right ranges - // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. - handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB); - } -} - void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; @@ -2950,19 +2257,47 @@ void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<SDValue, 4> Values(NumValues); SDValue Cond = getValue(I.getOperand(0)); - SDValue TrueVal = getValue(I.getOperand(1)); - SDValue FalseVal = getValue(I.getOperand(2)); + SDValue LHSVal = getValue(I.getOperand(1)); + SDValue RHSVal = getValue(I.getOperand(2)); + auto BaseOps = {Cond}; ISD::NodeType OpCode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; - for (unsigned i = 0; i != NumValues; ++i) + // Min/max matching is only viable if all output VTs are the same. + if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { + Value *LHS, *RHS; + SelectPatternFlavor SPF = matchSelectPattern(const_cast<User*>(&I), LHS, RHS); + ISD::NodeType Opc = ISD::DELETED_NODE; + switch (SPF) { + case SPF_UMAX: Opc = ISD::UMAX; break; + case SPF_UMIN: Opc = ISD::UMIN; break; + case SPF_SMAX: Opc = ISD::SMAX; break; + case SPF_SMIN: Opc = ISD::SMIN; break; + default: break; + } + + EVT VT = ValueVTs[0]; + LLVMContext &Ctx = *DAG.getContext(); + auto &TLI = DAG.getTargetLoweringInfo(); + while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector) + VT = TLI.getTypeToTransformTo(Ctx, VT); + + if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT)) { + OpCode = Opc; + LHSVal = getValue(LHS); + RHSVal = getValue(RHS); + BaseOps = {}; + } + } + + for (unsigned i = 0; i != NumValues; ++i) { + SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end()); + Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode(OpCode, getCurSDLoc(), - TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), - Cond, - SDValue(TrueVal.getNode(), - TrueVal.getResNo() + i), - SDValue(FalseVal.getNode(), - FalseVal.getResNo() + i)); + LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), + Ops); + } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValueVTs), Values)); @@ -2994,10 +2329,11 @@ void SelectionDAGBuilder::visitSExt(const User &I) { void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); + SDLoc dl = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, - DAG.getTargetConstant(0, TLI.getPointerTy()))); + setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, + DAG.getTargetConstant(0, dl, TLI.getPointerTy()))); } void SelectionDAGBuilder::visitFPExt(const User &I) { @@ -3053,19 +2389,20 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); + SDLoc dl = getCurSDLoc(); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. if (DestVT != N.getValueType()) - setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), + setValue(&I, DAG.getNode(ISD::BITCAST, dl, DestVT, N)); // convert types. // Check if the original LLVM IR Operand was a ConstantInt, because getValue() // might fold any kind of constant expression to an integer constant and that // is not what we are looking for. Only regcognize a bitcast of a genuine // constant integer as an opaque constant. else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) - setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, + setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, /*isOpaque*/true)); else setValue(&I, N); // noop cast. @@ -3243,10 +2580,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Input == 0 ? Src1 : Src2; if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); - else + else { + SDLoc dl = getCurSDLoc(); Src = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src, - DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy())); + ISD::EXTRACT_SUBVECTOR, dl, VT, Src, + DAG.getConstant(StartIdx[Input], dl, TLI.getVectorIdxTy())); + } } // Calculate new mask. @@ -3273,6 +2612,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // to insert and build vector. EVT EltVT = VT.getVectorElementType(); EVT IdxVT = TLI.getVectorIdxTy(); + SDLoc dl = getCurSDLoc(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; @@ -3284,14 +2624,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), - EltVT, Src, DAG.getConstant(Idx, IdxVT)); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + EltVT, Src, DAG.getConstant(Idx, dl, IdxVT)); } Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops)); + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { @@ -3383,6 +2723,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Type *Ty = Op0->getType()->getScalarType(); unsigned AS = Ty->getPointerAddressSpace(); SDValue N = getValue(Op0); + SDLoc dl = getCurSDLoc(); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -3392,8 +2733,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Field) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); - N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, - DAG.getConstant(Offset, N.getValueType())); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, + DAG.getConstant(Offset, dl, N.getValueType())); } Ty = StTy->getElementType(Field); @@ -3408,8 +2749,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); - SDValue OffsVal = DAG.getConstant(Offs, PtrTy); - N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); + SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); continue; } @@ -3418,24 +2759,24 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // If the index is smaller or larger than intptr_t, truncate or extend // it. - IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); + IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { unsigned Amt = ElementSize.logBase2(); - IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(), + IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN, - DAG.getConstant(Amt, IdxN.getValueType())); + DAG.getConstant(Amt, dl, IdxN.getValueType())); } else { - SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(), + SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType()); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale); } } - N = DAG.getNode(ISD::ADD, getCurSDLoc(), + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN); } } @@ -3449,6 +2790,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { if (FuncInfo.StaticAllocaMap.count(&I)) return; // getValue will auto-populate this. + SDLoc dl = getCurSDLoc(); Type *Ty = I.getAllocatedType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); @@ -3460,11 +2802,11 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { EVT IntPtr = TLI.getPointerTy(); if (AllocSize.getValueType() != IntPtr) - AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); + AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); - AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr, + AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, - DAG.getConstant(TySize, IntPtr)); + DAG.getConstant(TySize, dl, IntPtr)); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -3476,18 +2818,19 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. - AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(), + AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign-1)); + DAG.getIntPtrConstant(StackAlign - 1, dl)); // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(), + AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); + DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1), + dl)); - SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; + SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); - SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); @@ -3535,8 +2878,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } + SDLoc dl = getCurSDLoc(); + if (isVolatile) - Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), @@ -3552,15 +2897,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } - SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(), + SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, + DAG.getConstant(Offsets[i], dl, PtrVT)); + SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, isNonTemporal, isInvariant, Alignment, AAInfo, Ranges); @@ -3570,7 +2915,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } if (!ConstantMemory) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); if (isVolatile) DAG.setRoot(Chain); @@ -3578,7 +2923,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { PendingLoads.push_back(Chain); } - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values)); } @@ -3610,6 +2955,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; unsigned Alignment = I.getAlignment(); + SDLoc dl = getCurSDLoc(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -3618,21 +2964,21 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } - SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue St = DAG.getStore(Root, getCurSDLoc(), + SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, + DAG.getConstant(Offsets[i], dl, PtrVT)); + SDValue St = DAG.getStore(Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), isVolatile, isNonTemporal, Alignment, AAInfo); Chains[ChainI] = St; } - SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); DAG.setRoot(StoreNode); } @@ -3664,6 +3010,94 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { setValue(&I, StoreNode); } +// Gather/scatter receive a vector of pointers. +// This vector of pointers may be represented as a base pointer + vector of +// indices, it depends on GEP and instruction preceeding GEP +// that calculates indices +static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index, + SelectionDAGBuilder* SDB) { + + assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); + if (!Gep || Gep->getNumOperands() > 2) + return false; + ShuffleVectorInst *ShuffleInst = + dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand()); + if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() || + cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() != + Instruction::InsertElement) + return false; + + Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1); + + SelectionDAG& DAG = SDB->DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Check is the Ptr is inside current basic block + // If not, look for the shuffle instruction + if (SDB->findValue(Ptr)) + Base = SDB->getValue(Ptr); + else if (SDB->findValue(ShuffleInst)) { + SDValue ShuffleNode = SDB->getValue(ShuffleInst); + SDLoc sdl = ShuffleNode; + Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, + ShuffleNode.getValueType().getScalarType(), ShuffleNode, + DAG.getConstant(0, sdl, TLI.getVectorIdxTy())); + SDB->setValue(Ptr, Base); + } + else + return false; + + Value *IndexVal = Gep->getOperand(1); + if (SDB->findValue(IndexVal)) { + Index = SDB->getValue(IndexVal); + + if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) { + IndexVal = Sext->getOperand(0); + if (SDB->findValue(IndexVal)) + Index = SDB->getValue(IndexVal); + } + return true; + } + return false; +} + +void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) + Value *Ptr = I.getArgOperand(1); + SDValue Src0 = getValue(I.getArgOperand(0)); + SDValue Mask = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + SDValue Base; + SDValue Index; + Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + + Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + if (!UniformBase) { + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); + Index = getValue(Ptr); + } + SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; + SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, + Ops, MMO); + DAG.setRoot(Scatter); + setValue(&I, Scatter); +} + void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { SDLoc sdl = getCurSDLoc(); @@ -3705,6 +3139,59 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { setValue(&I, Load); } +void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) + Value *Ptr = I.getArgOperand(0); + SDValue Src0 = getValue(I.getArgOperand(3)); + SDValue Mask = getValue(I.getArgOperand(2)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue Root = DAG.getRoot(); + SDValue Base; + SDValue Index; + Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + bool ConstantMemory = false; + if (UniformBase && AA->pointsToConstantMemory( + AliasAnalysis::Location(BasePtr, + AA->getTypeStoreSize(I.getType()), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + if (!UniformBase) { + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); + Index = getValue(Ptr); + } + SDValue Ops[] = { Root, Src0, Mask, Base, Index }; + SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, + Ops, MMO); + + SDValue OutChain = Gather.getValue(1); + if (!ConstantMemory) + PendingLoads.push_back(OutChain); + setValue(&I, Gather); +} + void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); @@ -3769,8 +3256,8 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), dl, TLI.getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy()); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -3859,7 +3346,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), + TLI.getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3919,9 +3407,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, - DAG.getConstant(0x007fffff, MVT::i32)); + DAG.getConstant(0x007fffff, dl, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, - DAG.getConstant(0x3f800000, MVT::i32)); + DAG.getConstant(0x3f800000, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); } @@ -3934,21 +3422,108 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, SDLoc dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, - DAG.getConstant(0x7f800000, MVT::i32)); + DAG.getConstant(0x7f800000, dl, MVT::i32)); SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, - DAG.getConstant(23, TLI.getPointerTy())); + DAG.getConstant(23, dl, TLI.getPointerTy())); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, - DAG.getConstant(127, MVT::i32)); + DAG.getConstant(127, dl, MVT::i32)); return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); } /// getF32Constant - Get 32-bit floating point constant. static SDValue -getF32Constant(SelectionDAG &DAG, unsigned Flt) { - return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), +getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { + return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl, MVT::f32); } +static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, + SelectionDAG &DAG) { + // IntegerPartOfX = ((int32_t)(t0); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = t0 - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode( + ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy())); + + SDValue TwoToFractionalPartOfX; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304, dl)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e, dl)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3, dl)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd, dl)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e, dl)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d, dl)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14, dl)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234, dl)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000, dl)); + } + + // Add the exponent into the result in integer domain. + SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX)); +} + /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, @@ -3960,92 +3535,10 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, // final result: // // #define LOG2OFe 1.4426950f - // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); + // t0 = Op * LOG2OFe SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, - getF32Constant(DAG, 0x3fb8aa3b)); - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); - - // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; - SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); - - // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, TLI.getPointerTy())); - - SDValue TwoToFracPartOfX; - if (LimitFloatPrecision <= 6) { - // For floating-point precision of 6: - // - // TwoToFractionalPartOfX = - // 0.997535578f + - // (0.735607626f + 0.252464424f * x) * x; - // - // error 0.0144103317, which is 6 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - } else if (LimitFloatPrecision <= 12) { - // For floating-point precision of 12: - // - // TwoToFractionalPartOfX = - // 0.999892986f + - // (0.696457318f + - // (0.224338339f + 0.792043434e-1f * x) * x) * x; - // - // 0.000107046256 error, which is 13 to 14 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - } else { // LimitFloatPrecision <= 18 - // For floating-point precision of 18: - // - // TwoToFractionalPartOfX = - // 0.999999982f + - // (0.693148872f + - // (0.240227044f + - // (0.554906021e-1f + - // (0.961591928e-2f + - // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; - // - // error 2.47208000*10^(-7), which is better than 18 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - } - - // Add the exponent into the result in integer domain. - SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::ADD, dl, MVT::i32, - t13, IntegerPartOfX)); + getF32Constant(DAG, 0x3fb8aa3b, dl)); + return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. @@ -4063,7 +3556,7 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // Scale the exponent by log(2) [0.69314718f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3f317218)); + getF32Constant(DAG, 0x3f317218, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -4079,12 +3572,12 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0034276066, which is better than 8 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbe74c456)); + getF32Constant(DAG, 0xbe74c456, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3fb3a2b1)); + getF32Constant(DAG, 0x3fb3a2b1, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f949a29)); + getF32Constant(DAG, 0x3f949a29, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -4096,18 +3589,18 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.000061011436, which is 14 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbd67b6d6)); + getF32Constant(DAG, 0xbd67b6d6, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3ee4f4b8)); + getF32Constant(DAG, 0x3ee4f4b8, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fbc278b)); + getF32Constant(DAG, 0x3fbc278b, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40348e95)); + getF32Constant(DAG, 0x40348e95, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3fdef31a)); + getF32Constant(DAG, 0x3fdef31a, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -4121,24 +3614,24 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000023660568, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbc91e5ac)); + getF32Constant(DAG, 0xbc91e5ac, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e4350aa)); + getF32Constant(DAG, 0x3e4350aa, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f60d3e3)); + getF32Constant(DAG, 0x3f60d3e3, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x4011cdf0)); + getF32Constant(DAG, 0x4011cdf0, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x406cfd1c)); + getF32Constant(DAG, 0x406cfd1c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x408797cb)); + getF32Constant(DAG, 0x408797cb, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4006dcab)); + getF32Constant(DAG, 0x4006dcab, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); @@ -4173,12 +3666,12 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0049451742, which is more than 7 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbeb08fe0)); + getF32Constant(DAG, 0xbeb08fe0, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x40019463)); + getF32Constant(DAG, 0x40019463, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fd6633d)); + getF32Constant(DAG, 0x3fd6633d, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -4190,18 +3683,18 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000876136000, which is better than 13 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbda7262e)); + getF32Constant(DAG, 0xbda7262e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3f25280b)); + getF32Constant(DAG, 0x3f25280b, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x4007b923)); + getF32Constant(DAG, 0x4007b923, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40823e2f)); + getF32Constant(DAG, 0x40823e2f, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x4020d29c)); + getF32Constant(DAG, 0x4020d29c, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -4216,24 +3709,24 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000018516, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbcd2769e)); + getF32Constant(DAG, 0xbcd2769e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e8ce0b9)); + getF32Constant(DAG, 0x3e8ce0b9, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fa22ae7)); + getF32Constant(DAG, 0x3fa22ae7, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40525723)); + getF32Constant(DAG, 0x40525723, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x40aaf200)); + getF32Constant(DAG, 0x40aaf200, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x40c39dad)); + getF32Constant(DAG, 0x40c39dad, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4042902c)); + getF32Constant(DAG, 0x4042902c, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); @@ -4254,7 +3747,7 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // Scale the exponent by log10(2) [0.30102999f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3e9a209a)); + getF32Constant(DAG, 0x3e9a209a, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -4270,12 +3763,12 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0014886165, which is 6 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbdd49a13)); + getF32Constant(DAG, 0xbdd49a13, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3f1c0789)); + getF32Constant(DAG, 0x3f1c0789, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f011300)); + getF32Constant(DAG, 0x3f011300, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -4286,15 +3779,15 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.00019228036, which is better than 12 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3d431f31)); + getF32Constant(DAG, 0x3d431f31, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3ea21fb2)); + getF32Constant(DAG, 0x3ea21fb2, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f6ae232)); + getF32Constant(DAG, 0x3f6ae232, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f25f7c3)); + getF32Constant(DAG, 0x3f25f7c3, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -4307,21 +3800,21 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000037995730, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3c5d51ce)); + getF32Constant(DAG, 0x3c5d51ce, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e00685a)); + getF32Constant(DAG, 0x3e00685a, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3efb6798)); + getF32Constant(DAG, 0x3efb6798, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f88d192)); + getF32Constant(DAG, 0x3f88d192, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3fc4316c)); + getF32Constant(DAG, 0x3fc4316c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3f57ce70)); + getF32Constant(DAG, 0x3f57ce70, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); @@ -4336,91 +3829,8 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && - LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); - - // FractionalPartOfX = x - (float)IntegerPartOfX; - SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); - - // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, TLI.getPointerTy())); - - SDValue TwoToFractionalPartOfX; - if (LimitFloatPrecision <= 6) { - // For floating-point precision of 6: - // - // TwoToFractionalPartOfX = - // 0.997535578f + - // (0.735607626f + 0.252464424f * x) * x; - // - // error 0.0144103317, which is 6 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - } else if (LimitFloatPrecision <= 12) { - // For floating-point precision of 12: - // - // TwoToFractionalPartOfX = - // 0.999892986f + - // (0.696457318f + - // (0.224338339f + 0.792043434e-1f * x) * x) * x; - // - // error 0.000107046256, which is 13 to 14 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - } else { // LimitFloatPrecision <= 18 - // For floating-point precision of 18: - // - // TwoToFractionalPartOfX = - // 0.999999982f + - // (0.693148872f + - // (0.240227044f + - // (0.554906021e-1f + - // (0.961591928e-2f + - // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; - // error 2.47208000*10^(-7), which is better than 18 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - } - - // Add the exponent into the result in integer domain. - SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, - TwoToFractionalPartOfX); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::ADD, dl, MVT::i32, - t13, IntegerPartOfX)); - } + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) + return getLimitedPrecisionExp2(Op, dl, DAG); // No special expansion. return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); @@ -4444,90 +3854,10 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, // final result: // // #define LOG2OF10 3.3219281f - // IntegerPartOfX = (int32_t)(x * LOG2OF10); + // t0 = Op * LOG2OF10; SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, - getF32Constant(DAG, 0x40549a78)); - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); - - // FractionalPartOfX = x - (float)IntegerPartOfX; - SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); - - // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, TLI.getPointerTy())); - - SDValue TwoToFractionalPartOfX; - if (LimitFloatPrecision <= 6) { - // For floating-point precision of 6: - // - // twoToFractionalPartOfX = - // 0.997535578f + - // (0.735607626f + 0.252464424f * x) * x; - // - // error 0.0144103317, which is 6 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - } else if (LimitFloatPrecision <= 12) { - // For floating-point precision of 12: - // - // TwoToFractionalPartOfX = - // 0.999892986f + - // (0.696457318f + - // (0.224338339f + 0.792043434e-1f * x) * x) * x; - // - // error 0.000107046256, which is 13 to 14 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - } else { // LimitFloatPrecision <= 18 - // For floating-point precision of 18: - // - // TwoToFractionalPartOfX = - // 0.999999982f + - // (0.693148872f + - // (0.240227044f + - // (0.554906021e-1f + - // (0.961591928e-2f + - // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; - // error 2.47208000*10^(-7), which is better than 18 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - } - - SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::ADD, dl, MVT::i32, - t13, IntegerPartOfX)); + getF32Constant(DAG, 0x40549a78, dl)); + return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. @@ -4549,14 +3879,13 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // powi(x, 0) -> 1.0 if (Val == 0) - return DAG.getConstantFP(1.0, LHS.getValueType()); + return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize) || + if (!F->hasFnAttribute(Attribute::OptimizeForSize) || // If optimizing for size, don't insert too many multiplies. This // inserts up to 5 multiplies. - CountPopulation_32(Val)+Log2_32(Val) < 7) { + countPopulation(Val) + Log2_32(Val) < 7) { // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, // powi(x,15) generates one more multiply than it should), but this has @@ -4579,7 +3908,7 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // If the original was negative, invert the result, producing 1/(x*x*x). if (RHSC->getSExtValue() < 0) Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), - DAG.getConstantFP(1.0, LHS.getValueType()), Res); + DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res); return Res; } } @@ -4609,11 +3938,9 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. -bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, - MDNode *Variable, - MDNode *Expr, int64_t Offset, - bool IsIndirect, - const SDValue &N) { +bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( + const Value *V, DILocalVariable *Variable, DIExpression *Expr, + DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; @@ -4622,8 +3949,9 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); // Ignore inlined function arguments here. - DIVariable DV(Variable); - if (DV.isInlinedFnArgument(MF.getFunction())) + // + // FIXME: Should we be checking DL->inlinedAt() to determine this? + if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) return false; Optional<MachineOperand> Op; @@ -4664,13 +3992,15 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, if (!Op) return false; + assert(Variable->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); if (Op->isReg()) FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE), - IsIndirect, Op->getReg(), Offset, Variable, Expr)); + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + Op->getReg(), Offset, Variable, Expr)); else FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addOperand(*Op) .addImm(Offset) .addMetadata(Variable) @@ -4715,16 +4045,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); + SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); EVT VT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); + Res = DAG.getNode(ISD::READ_REGISTER, sdl, + DAG.getVTList(VT, MVT::Other), Chain, RegName); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); return nullptr; } case Intrinsic::write_register: { Value *Reg = I.getArgOperand(0); Value *RegValue = I.getArgOperand(1); - SDValue Chain = getValue(RegValue).getOperand(0); + SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, @@ -4736,6 +4070,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { + // FIXME: this definition of "user defined address space" is x86-specific // Assert for address < 256 since we support only user defined address // spaces. assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() @@ -4750,12 +4085,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)))); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + false, isTC, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MC); return nullptr; } case Intrinsic::memset: { + // FIXME: this definition of "user defined address space" is x86-specific // Assert for address < 256 since we support only user defined address // spaces. assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() @@ -4768,11 +4107,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - MachinePointerInfo(I.getArgOperand(0)))); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + isTC, MachinePointerInfo(I.getArgOperand(0))); + updateDAGForMaybeTailCall(MS); return nullptr; } case Intrinsic::memmove: { + // FIXME: this definition of "user defined address space" is x86-specific // Assert for address < 256 since we support only user defined address // spaces. assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() @@ -4787,20 +4129,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)))); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MM); return nullptr; } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); - MDNode *Variable = DI.getVariable(); - MDNode *Expression = DI.getExpression(); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); const Value *Address = DI.getAddress(); - DIVariable DIVar(Variable); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (!Address || !DIVar) { + assert(Variable && "Missing variable"); + if (!Address) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } @@ -4821,9 +4163,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. - bool isParameter = - (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || - isa<Argument>(Address)); + bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable || + isa<Argument>(Address); const AllocaInst *AI = dyn_cast<AllocaInst>(Address); @@ -4836,7 +4177,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { else { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, + N); return nullptr; } } else if (AI) @@ -4853,7 +4195,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, N)) { // If variable is pinned by a alloca in dominating bb then // use StaticAllocaMap. @@ -4876,14 +4218,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); - DIVariable DIVar(DI.getVariable()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgValueInst should be either null or a DIVariable."); - if (!DIVar) - return nullptr; + assert(DI.getVariable() && "Missing variable"); - MDNode *Variable = DI.getVariable(); - MDNode *Expression = DI.getExpression(); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) @@ -4904,7 +4242,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (N.getNode()) { // A dbg.value for an alloca is always indirect. bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; - if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset, + if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, IsIndirect, N)) { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), IsIndirect, Offset, dl, SDNodeOrder); @@ -4943,7 +4281,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Find the type id for the given typeinfo. GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); - Res = DAG.getConstant(TypeID, MVT::i32); + Res = DAG.getConstant(TypeID, sdl, MVT::i32); setValue(&I, Res); return nullptr; } @@ -4969,7 +4307,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { CfaArg.getValueType()), CfaArg); SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), - DAG.getConstant(0, TLI.getPointerTy())); + DAG.getConstant(0, sdl, TLI.getPointerTy())); setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return nullptr; @@ -5008,9 +4346,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + case Intrinsic::masked_gather: + visitMaskedGather(I); + return nullptr; case Intrinsic::masked_load: visitMaskedLoad(I); return nullptr; + case Intrinsic::masked_scatter: + visitMaskedScatter(I); + return nullptr; case Intrinsic::masked_store: visitMaskedStore(I); return nullptr; @@ -5063,44 +4407,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // We must do this early because v2i32 is not a legal type. SDValue ShOps[2]; ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, MVT::i32); + ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); EVT DestVT = TLI.getValueType(I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, - DAG.getConstant(NewIntrinsic, MVT::i32), + DAG.getConstant(NewIntrinsic, sdl, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); return nullptr; } - case Intrinsic::x86_avx_vinsertf128_pd_256: - case Intrinsic::x86_avx_vinsertf128_ps_256: - case Intrinsic::x86_avx_vinsertf128_si_256: - case Intrinsic::x86_avx2_vinserti128: { - EVT DestVT = TLI.getValueType(I.getType()); - EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); - uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * - ElVT.getVectorNumElements(); - Res = - DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, - getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getConstant(Idx, TLI.getVectorIdxTy())); - setValue(&I, Res); - return nullptr; - } - case Intrinsic::x86_avx_vextractf128_pd_256: - case Intrinsic::x86_avx_vextractf128_ps_256: - case Intrinsic::x86_avx_vextractf128_si_256: - case Intrinsic::x86_avx2_vextracti128: { - EVT DestVT = TLI.getValueType(I.getType()); - uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * - DestVT.getVectorNumElements(); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, - getValue(I.getArgOperand(0)), - DAG.getConstant(Idx, TLI.getVectorIdxTy())); - setValue(&I, Res); - return nullptr; - } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -5238,7 +4554,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, getValue(I.getArgOperand(0)), - DAG.getTargetConstant(0, MVT::i32)))); + DAG.getTargetConstant(0, sdl, + MVT::i32)))); return nullptr; case Intrinsic::convert_from_fp16: setValue(&I, @@ -5366,9 +4683,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { EVT Ty = Arg.getValueType(); if (CI->isZero()) - Res = DAG.getConstant(-1ULL, Ty); + Res = DAG.getConstant(-1ULL, sdl, Ty); else - Res = DAG.getConstant(0, Ty); + Res = DAG.getConstant(0, sdl, Ty); setValue(&I, Res); return nullptr; @@ -5498,7 +4815,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; SmallVector<Value *, 4> Allocas; - GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); + GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL); for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { @@ -5547,6 +4864,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); + case Intrinsic::eh_actions: + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + return nullptr; case Intrinsic::donothing: // ignore return nullptr; @@ -5565,7 +4885,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::experimental_gc_result_int: case Intrinsic::experimental_gc_result_float: - case Intrinsic::experimental_gc_result_ptr: { + case Intrinsic::experimental_gc_result_ptr: + case Intrinsic::experimental_gc_result: { visitGCResult(I); return nullptr; } @@ -5576,45 +4897,49 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); - case Intrinsic::frameallocate: { + case Intrinsic::frameescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - // Do the allocation and map it as a normal value. - // FIXME: Maybe we should add this to the alloca map so that we don't have - // to register allocate it? - uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(); - int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size); - MVT PtrVT = TLI.getPointerTy(0); - SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT); - setValue(&I, FIVal); - - // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is - // the same on all targets. - MCSymbol *FrameAllocSym = - MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName()); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, - TII->get(TargetOpcode::FRAME_ALLOC)) - .addSym(FrameAllocSym) - .addFrameIndex(Alloc); + // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission + // is the same on all targets. + for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { + Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); + if (isa<ConstantPointerNull>(Arg)) + continue; // Skip null pointers. They represent a hole in index space. + AllocaInst *Slot = cast<AllocaInst>(Arg); + assert(FuncInfo.StaticAllocaMap.count(Slot) && + "can only escape static allocas"); + int FI = FuncInfo.StaticAllocaMap[Slot]; + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol( + GlobalValue::getRealLinkageName(MF.getName()), Idx); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, + TII->get(TargetOpcode::FRAME_ALLOC)) + .addSym(FrameAllocSym) + .addFrameIndex(FI); + } return nullptr; } case Intrinsic::framerecover: { - // i8* @llvm.framerecover(i8* %fn, i8* %fp) + // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx) MachineFunction &MF = DAG.getMachineFunction(); MVT PtrVT = TLI.getPointerTy(0); // Get the symbol that defines the frame offset. - Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); + unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); MCSymbol *FrameAllocSym = - MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName()); + MF.getMMI().getContext().getOrCreateFrameAllocSymbol( + GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); // Create a TargetExternalSymbol for the label to avoid any target lowering // that would make this PC relative. StringRef Name = FrameAllocSym->getName(); - assert(Name.size() == strlen(Name.data()) && "not null terminated"); + assert(Name.data()[Name.size()] == '\0' && "not null terminated"); SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT); SDValue OffsetVal = DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); @@ -5627,6 +4952,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + case Intrinsic::eh_begincatch: + case Intrinsic::eh_endcatch: + llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); + case Intrinsic::eh_exceptioncode: { + unsigned Reg = TLI.getExceptionPointerRegister(); + assert(Reg && "cannot get exception code on this platform"); + MVT PtrVT = TLI.getPointerTy(); + const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); + unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC); + SDValue N = + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); + setValue(&I, N); + return nullptr; + } } } @@ -5639,7 +4979,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (LandingPad) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. - BeginLabel = MMI.getContext().CreateTempSymbol(); + BeginLabel = MMI.getContext().createTempSymbol(); // For SjLj, keep track of which landing pads go with which invokes // so as to maintain the ordering of pads in the LSDA. @@ -5659,9 +4999,8 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, CLI.setChain(getRoot()); } - - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); - std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); assert((CLI.IsTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); @@ -5683,7 +5022,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (LandingPad) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. - MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); + MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. @@ -5766,9 +5105,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), PointerType::getUnqual(LoadTy)); - if (const Constant *LoadCst = - ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), - Builder.DL)) + if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( + const_cast<Constant *>(LoadInput), *Builder.DL)) return Builder.getValue(LoadCst); } @@ -5829,7 +5167,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); if (CSize && CSize->getZExtValue() == 0) { EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); - setValue(&I, DAG.getConstant(0, CallVT)); + setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); return true; } @@ -6112,7 +5450,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } } - if (unsigned IID = F->getIntrinsicID()) { + if (Intrinsic::ID IID = F->getIntrinsicID()) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; @@ -6364,9 +5702,10 @@ static void GetRegistersForValue(SelectionDAG &DAG, // If this is a constraint for a single physreg, or a constraint for a // register class, find it. - std::pair<unsigned, const TargetRegisterClass*> PhysReg = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> PhysReg = + TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), + OpInfo.ConstraintCode, + OpInfo.ConstraintVT); unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) { @@ -6462,8 +5801,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfoVector ConstraintOperands; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI.ParseConstraints(CS); + TargetLowering::AsmOperandInfoVector TargetConstraints = + TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS); bool hasMemory = false; @@ -6555,12 +5894,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - std::pair<unsigned, const TargetRegisterClass*> MatchRC = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - std::pair<unsigned, const TargetRegisterClass*> InputRC = - TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, - Input.ConstraintVT); + const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + std::pair<unsigned, const TargetRegisterClass *> MatchRC = + TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> InputRC = + TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { @@ -6687,7 +6027,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } } - AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, + AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, getCurSDLoc(), TLI.getPointerTy())); // Loop over all of the inputs, copying the operand values into the @@ -6707,10 +6047,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Memory output, or 'other' output (e.g. 'X' constraint). assert(OpInfo.isIndirect && "Memory output must be indirect operand"); + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, - TLI.getPointerTy())); + OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), + MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -6745,7 +6091,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { .AddInlineAsmOperands(OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber : InlineAsm::Kind_RegDef, - false, 0, DAG, AsmNodeOperands); + false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } case InlineAsm::isInput: { @@ -6800,11 +6146,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { return; } } + SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, - true, OpInfo.getMatchedOperand(), + true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); break; } @@ -6814,9 +6161,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Unexpected number of operands"); // Add information to the INLINEASM node to know about this input. // See InlineAsm.h isUseOperandTiedToDef. + OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, getCurSDLoc(), TLI.getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; @@ -6843,6 +6191,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + getCurSDLoc(), TLI.getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; @@ -6853,10 +6202,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { assert(InOperandVal.getValueType() == TLI.getPointerTy() && "Memory operands expect pointer values"); + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI.getPointerTy())); + getCurSDLoc(), + MVT::i32)); AsmNodeOperands.push_back(InOperandVal); break; } @@ -6884,11 +6240,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { return; } - OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), + SDLoc dl = getCurSDLoc(); + + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, - DAG, AsmNodeOperands); + dl, DAG, AsmNodeOperands); break; } case InlineAsm::isClobber: { @@ -6896,7 +6254,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, - false, 0, DAG, + false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } @@ -7020,7 +6378,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - bool UseVoidTy, + Type *ReturnTy, MachineBasicBlock *LandingPad, bool IsPatchPoint) { TargetLowering::ArgListTy Args; @@ -7041,10 +6399,9 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, Args.push_back(Entry); } - Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) + .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); return lowerInvokable(CLI, LandingPad); @@ -7068,15 +6425,15 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, - SmallVectorImpl<SDValue> &Ops, + SDLoc DL, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { SDValue OpVal = Builder.getValue(CS.getArgument(i)); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { Ops.push_back( - Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); Ops.push_back( - Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); Ops.push_back( @@ -7098,7 +6455,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { SDLoc DL = getCurSDLoc(); Callee = getValue(CI.getCalledValue()); - NullPtr = DAG.getIntPtrConstant(0, true); + NullPtr = DAG.getIntPtrConstant(0, DL, true); // The stackmap intrinsic only records the live variables (the arguemnts // passed to it) and emits NOPS (if requested). Unlike the patchpoint @@ -7116,13 +6473,14 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // Add the <id> and <numBytes> constants. SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64)); SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL, + MVT::i32)); // Push live variables for the stack map. - addStackMapLiveVars(&CI, 2, Ops, *this); + addStackMapLiveVars(&CI, 2, DL, Ops, *this); // We are not pushing any register mask info here on the operands list, // because the stackmap doesn't clobber anything. @@ -7161,7 +6519,17 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, CallingConv::ID CC = CS.getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; bool HasDef = !CS->getType()->isVoidTy(); - SDValue Callee = getValue(CS->getOperand(2)); // <target> + SDLoc dl = getCurSDLoc(); + SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos)); + + // Handle immediate and symbolic callees. + if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee)) + Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl, + /*isTarget=*/true); + else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(), + SDLoc(SymbolicCallee), + SymbolicCallee->getValueType(0)); // Get the real number of arguments participating in the call <numArgs> SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); @@ -7175,8 +6543,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; + Type *ReturnTy = + IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); std::pair<SDValue, SDValue> Result = - lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, + lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, LandingPad, true); SDNode *CallEnd = Result.second.getNode(); @@ -7196,26 +6566,24 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Add the <id> and <numBytes> constants. SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64)); SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl, + MVT::i32)); - // Assume that the Callee is a constant address. - // FIXME: handle function symbols in the future. - Ops.push_back( - DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), - /*isTarget=*/true)); + // Add the callee. + Ops.push_back(Callee); // Adjust <numArgs> to account for any arguments that have been passed on the // stack instead. // Call Node: Chain, Target, {Args}, RegMask, [Glue] unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; - Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32)); // Add the calling convention - Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); + Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32)); // Add the arguments we omitted previously. The register allocator should // place these in any free register. @@ -7225,11 +6593,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; - for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) - Ops.push_back(*i); + Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. - addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this); + addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this); // Push the register mask info. if (HasGlue) @@ -7262,7 +6629,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Replace the target specific call node with a PATCHPOINT node. MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, - getCurSDLoc(), NodeTys, Ops); + dl, NodeTys, Ops); // Update the NodeMap. if (HasDef) { @@ -7529,7 +6896,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, - CLI.DAG.getConstant(Offsets[i], PtrVT)); + CLI.DAG.getConstant(Offsets[i], CLI.DL, + PtrVT)); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, @@ -7849,7 +7217,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { assert(i == InVals.size() && "Argument register count mismatch!"); // Finally, if the target has anything special to do, allow it to do so. - // FIXME: this should insert code into the DAG! EmitFunctionEntryCode(); } @@ -7866,8 +7233,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; - // Check successor nodes' PHI nodes that expect a constant to be available - // from this block. + // Check PHI nodes in successors that expect a value to be available from this + // block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa<PHINode>(SuccBB->begin())) continue; @@ -7954,3 +7321,816 @@ AddSuccessorMBB(const BasicBlock *BB, SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); return SuccMBB; } + +MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { + MachineFunction::iterator I = MBB; + if (++I == FuncInfo.MF->end()) + return nullptr; + return I; +} + +/// During lowering new call nodes can be created (such as memset, etc.). +/// Those will become new roots of the current DAG, but complications arise +/// when they are tail calls. In such cases, the call lowering will update +/// the root, but the builder still needs to know that a tail call has been +/// lowered in order to avoid generating an additional return. +void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { + // If the node is null, we do have a tail call. + if (MaybeTC.getNode() != nullptr) + DAG.setRoot(MaybeTC); + else + HasTailCall = true; +} + +bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, + unsigned *TotalCases, unsigned First, + unsigned Last) { + assert(Last >= First); + assert(TotalCases[Last] >= TotalCases[First]); + + APInt LowCase = Clusters[First].Low->getValue(); + APInt HighCase = Clusters[Last].High->getValue(); + assert(LowCase.getBitWidth() == HighCase.getBitWidth()); + + // FIXME: A range of consecutive cases has 100% density, but only requires one + // comparison to lower. We should discriminate against such consecutive ranges + // in jump tables. + + uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100); + uint64_t Range = Diff + 1; + + uint64_t NumCases = + TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); + + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + + return NumCases * 100 >= Range * MinJumpTableDensity; +} + +static inline bool areJTsAllowed(const TargetLowering &TLI) { + return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); +} + +bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, + unsigned First, unsigned Last, + const SwitchInst *SI, + MachineBasicBlock *DefaultMBB, + CaseCluster &JTCluster) { + assert(First <= Last); + + uint32_t Weight = 0; + unsigned NumCmps = 0; + std::vector<MachineBasicBlock*> Table; + DenseMap<MachineBasicBlock*, uint32_t> JTWeights; + for (unsigned I = First; I <= Last; ++I) { + assert(Clusters[I].Kind == CC_Range); + Weight += Clusters[I].Weight; + assert(Weight >= Clusters[I].Weight && "Weight overflow!"); + APInt Low = Clusters[I].Low->getValue(); + APInt High = Clusters[I].High->getValue(); + NumCmps += (Low == High) ? 1 : 2; + if (I != First) { + // Fill the gap between this and the previous cluster. + APInt PreviousHigh = Clusters[I - 1].High->getValue(); + assert(PreviousHigh.slt(Low)); + uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1; + for (uint64_t J = 0; J < Gap; J++) + Table.push_back(DefaultMBB); + } + uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; + for (uint64_t J = 0; J < ClusterSize; ++J) + Table.push_back(Clusters[I].MBB); + JTWeights[Clusters[I].MBB] += Clusters[I].Weight; + } + + unsigned NumDests = JTWeights.size(); + if (isSuitableForBitTests(NumDests, NumCmps, + Clusters[First].Low->getValue(), + Clusters[Last].High->getValue())) { + // Clusters[First..Last] should be lowered as bit tests instead. + return false; + } + + // Create the MBB that will load from and jump through the table. + // Note: We create it here, but it's not inserted into the function yet. + MachineFunction *CurMF = FuncInfo.MF; + MachineBasicBlock *JumpTableMBB = + CurMF->CreateMachineBasicBlock(SI->getParent()); + + // Add successors. Note: use table order for determinism. + SmallPtrSet<MachineBasicBlock *, 8> Done; + for (MachineBasicBlock *Succ : Table) { + if (Done.count(Succ)) + continue; + addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]); + Done.insert(Succ); + } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) + ->createJumpTableIndex(Table); + + // Set up the jump table info. + JumpTable JT(-1U, JTI, JumpTableMBB, nullptr); + JumpTableHeader JTH(Clusters[First].Low->getValue(), + Clusters[Last].High->getValue(), SI->getCondition(), + nullptr, false); + JTCases.push_back(JumpTableBlock(JTH, JT)); + + JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, + JTCases.size() - 1, Weight); + return true; +} + +void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, + const SwitchInst *SI, + MachineBasicBlock *DefaultMBB) { +#ifndef NDEBUG + // Clusters must be non-empty, sorted, and only contain Range clusters. + assert(!Clusters.empty()); + for (CaseCluster &C : Clusters) + assert(C.Kind == CC_Range); + for (unsigned i = 1, e = Clusters.size(); i < e; ++i) + assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue())); +#endif + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!areJTsAllowed(TLI)) + return; + + const int64_t N = Clusters.size(); + const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); + + // Split Clusters into minimum number of dense partitions. The algorithm uses + // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code + // for the Case Statement'" (1994), but builds the MinPartitions array in + // reverse order to make it easier to reconstruct the partitions in ascending + // order. In the choice between two optimal partitionings, it picks the one + // which yields more jump tables. + + // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. + SmallVector<unsigned, 8> MinPartitions(N); + // LastElement[i] is the last element of the partition starting at i. + SmallVector<unsigned, 8> LastElement(N); + // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1]. + SmallVector<unsigned, 8> NumTables(N); + // TotalCases[i]: Total nbr of cases in Clusters[0..i]. + SmallVector<unsigned, 8> TotalCases(N); + + for (unsigned i = 0; i < N; ++i) { + APInt Hi = Clusters[i].High->getValue(); + APInt Lo = Clusters[i].Low->getValue(); + TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; + if (i != 0) + TotalCases[i] += TotalCases[i - 1]; + } + + // Base case: There is only one way to partition Clusters[N-1]. + MinPartitions[N - 1] = 1; + LastElement[N - 1] = N - 1; + assert(MinJumpTableSize > 1); + NumTables[N - 1] = 0; + + // Note: loop indexes are signed to avoid underflow. + for (int64_t i = N - 2; i >= 0; i--) { + // Find optimal partitioning of Clusters[i..N-1]. + // Baseline: Put Clusters[i] into a partition on its own. + MinPartitions[i] = MinPartitions[i + 1] + 1; + LastElement[i] = i; + NumTables[i] = NumTables[i + 1]; + + // Search for a solution that results in fewer partitions. + for (int64_t j = N - 1; j > i; j--) { + // Try building a partition from Clusters[i..j]. + if (isDense(Clusters, &TotalCases[0], i, j)) { + unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); + bool IsTable = j - i + 1 >= MinJumpTableSize; + unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); + + // If this j leads to fewer partitions, or same number of partitions + // with more lookup tables, it is a better partitioning. + if (NumPartitions < MinPartitions[i] || + (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) { + MinPartitions[i] = NumPartitions; + LastElement[i] = j; + NumTables[i] = Tables; + } + } + } + } + + // Iterate over the partitions, replacing some with jump tables in-place. + unsigned DstIndex = 0; + for (unsigned First = 0, Last; First < N; First = Last + 1) { + Last = LastElement[First]; + assert(Last >= First); + assert(DstIndex <= First); + unsigned NumClusters = Last - First + 1; + + CaseCluster JTCluster; + if (NumClusters >= MinJumpTableSize && + buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { + Clusters[DstIndex++] = JTCluster; + } else { + for (unsigned I = First; I <= Last; ++I) + std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); + } + } + Clusters.resize(DstIndex); +} + +bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { + // FIXME: Using the pointer type doesn't seem ideal. + uint64_t BW = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); + uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; + return Range <= BW; +} + +bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests, + unsigned NumCmps, + const APInt &Low, + const APInt &High) { + // FIXME: I don't think NumCmps is the correct metric: a single case and a + // range of cases both require only one branch to lower. Just looking at the + // number of clusters and destinations should be enough to decide whether to + // build bit tests. + + // To lower a range with bit tests, the range must fit the bitwidth of a + // machine word. + if (!rangeFitsInWord(Low, High)) + return false; + + // Decide whether it's profitable to lower this range with bit tests. Each + // destination requires a bit test and branch, and there is an overall range + // check branch. For a small number of clusters, separate comparisons might be + // cheaper, and for many destinations, splitting the range might be better. + return (NumDests == 1 && NumCmps >= 3) || + (NumDests == 2 && NumCmps >= 5) || + (NumDests == 3 && NumCmps >= 6); +} + +bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, + unsigned First, unsigned Last, + const SwitchInst *SI, + CaseCluster &BTCluster) { + assert(First <= Last); + if (First == Last) + return false; + + BitVector Dests(FuncInfo.MF->getNumBlockIDs()); + unsigned NumCmps = 0; + for (int64_t I = First; I <= Last; ++I) { + assert(Clusters[I].Kind == CC_Range); + Dests.set(Clusters[I].MBB->getNumber()); + NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2; + } + unsigned NumDests = Dests.count(); + + APInt Low = Clusters[First].Low->getValue(); + APInt High = Clusters[Last].High->getValue(); + assert(Low.slt(High)); + + if (!isSuitableForBitTests(NumDests, NumCmps, Low, High)) + return false; + + APInt LowBound; + APInt CmpRange; + + const int BitWidth = + DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); + assert((High - Low + 1).sle(BitWidth) && "Case range must fit in bit mask!"); + + if (Low.isNonNegative() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a + // word without having to subtract minValue. In this case, + // we can optimize away the subtraction. + LowBound = APInt::getNullValue(Low.getBitWidth()); + CmpRange = High; + } else { + LowBound = Low; + CmpRange = High - Low; + } + + CaseBitsVector CBV; + uint32_t TotalWeight = 0; + for (unsigned i = First; i <= Last; ++i) { + // Find the CaseBits for this destination. + unsigned j; + for (j = 0; j < CBV.size(); ++j) + if (CBV[j].BB == Clusters[i].MBB) + break; + if (j == CBV.size()) + CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0)); + CaseBits *CB = &CBV[j]; + + // Update Mask, Bits and ExtraWeight. + uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); + uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); + for (uint64_t j = Lo; j <= Hi; ++j) { + CB->Mask |= 1ULL << j; + CB->Bits++; + } + CB->ExtraWeight += Clusters[i].Weight; + TotalWeight += Clusters[i].Weight; + assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!"); + } + + BitTestInfo BTI; + std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { + // Sort by weight first, number of bits second. + if (a.ExtraWeight != b.ExtraWeight) + return a.ExtraWeight > b.ExtraWeight; + return a.Bits > b.Bits; + }); + + for (auto &CB : CBV) { + MachineBasicBlock *BitTestBB = + FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); + BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); + } + BitTestCases.push_back(BitTestBlock(LowBound, CmpRange, SI->getCondition(), + -1U, MVT::Other, false, nullptr, + nullptr, std::move(BTI))); + + BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, + BitTestCases.size() - 1, TotalWeight); + return true; +} + +void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, + const SwitchInst *SI) { +// Partition Clusters into as few subsets as possible, where each subset has a +// range that fits in a machine word and has <= 3 unique destinations. + +#ifndef NDEBUG + // Clusters must be sorted and contain Range or JumpTable clusters. + assert(!Clusters.empty()); + assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable); + for (const CaseCluster &C : Clusters) + assert(C.Kind == CC_Range || C.Kind == CC_JumpTable); + for (unsigned i = 1; i < Clusters.size(); ++i) + assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); +#endif + + // If target does not have legal shift left, do not emit bit tests at all. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PTy = TLI.getPointerTy(); + if (!TLI.isOperationLegal(ISD::SHL, PTy)) + return; + + int BitWidth = PTy.getSizeInBits(); + const int64_t N = Clusters.size(); + + // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. + SmallVector<unsigned, 8> MinPartitions(N); + // LastElement[i] is the last element of the partition starting at i. + SmallVector<unsigned, 8> LastElement(N); + + // FIXME: This might not be the best algorithm for finding bit test clusters. + + // Base case: There is only one way to partition Clusters[N-1]. + MinPartitions[N - 1] = 1; + LastElement[N - 1] = N - 1; + + // Note: loop indexes are signed to avoid underflow. + for (int64_t i = N - 2; i >= 0; --i) { + // Find optimal partitioning of Clusters[i..N-1]. + // Baseline: Put Clusters[i] into a partition on its own. + MinPartitions[i] = MinPartitions[i + 1] + 1; + LastElement[i] = i; + + // Search for a solution that results in fewer partitions. + // Note: the search is limited by BitWidth, reducing time complexity. + for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) { + // Try building a partition from Clusters[i..j]. + + // Check the range. + if (!rangeFitsInWord(Clusters[i].Low->getValue(), + Clusters[j].High->getValue())) + continue; + + // Check nbr of destinations and cluster types. + // FIXME: This works, but doesn't seem very efficient. + bool RangesOnly = true; + BitVector Dests(FuncInfo.MF->getNumBlockIDs()); + for (int64_t k = i; k <= j; k++) { + if (Clusters[k].Kind != CC_Range) { + RangesOnly = false; + break; + } + Dests.set(Clusters[k].MBB->getNumber()); + } + if (!RangesOnly || Dests.count() > 3) + break; + + // Check if it's a better partition. + unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); + if (NumPartitions < MinPartitions[i]) { + // Found a better partition. + MinPartitions[i] = NumPartitions; + LastElement[i] = j; + } + } + } + + // Iterate over the partitions, replacing with bit-test clusters in-place. + unsigned DstIndex = 0; + for (unsigned First = 0, Last; First < N; First = Last + 1) { + Last = LastElement[First]; + assert(First <= Last); + assert(DstIndex <= First); + + CaseCluster BitTestCluster; + if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { + Clusters[DstIndex++] = BitTestCluster; + } else { + for (unsigned I = First; I <= Last; ++I) + std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); + } + } + Clusters.resize(DstIndex); +} + +void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *DefaultMBB) { + MachineFunction *CurMF = FuncInfo.MF; + MachineBasicBlock *NextMBB = nullptr; + MachineFunction::iterator BBI = W.MBB; + if (++BBI != FuncInfo.MF->end()) + NextMBB = BBI; + + unsigned Size = W.LastCluster - W.FirstCluster + 1; + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + + if (Size == 2 && W.MBB == SwitchMBB) { + // If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + // TODO: This could be extended to merge any 2 cases in switches with 3 + // cases. + // TODO: Handle cases where W.CaseBB != SwitchBB. + CaseCluster &Small = *W.FirstCluster; + CaseCluster &Big = *W.LastCluster; + + if (Small.Low == Small.High && Big.Low == Big.High && + Small.MBB == Big.MBB) { + const APInt &SmallValue = Small.Low->getValue(); + const APInt &BigValue = Big.Low->getValue(); + + // Check that there is only one bit different. + if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && + (SmallValue | BigValue) == BigValue) { + // Isolate the common bit. + APInt CommonBit = BigValue & ~SmallValue; + assert((SmallValue | CommonBit) == BigValue && + CommonBit.countPopulation() == 1 && "Not a common bit?"); + + SDValue CondLHS = getValue(Cond); + EVT VT = CondLHS.getValueType(); + SDLoc DL = getCurSDLoc(); + + SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, + DAG.getConstant(CommonBit, DL, VT)); + SDValue Cond = DAG.getSetCC(DL, MVT::i1, Or, + DAG.getConstant(BigValue, DL, VT), + ISD::SETEQ); + + // Update successor info. + // Both Small and Big will jump to Small.BB, so we sum up the weights. + addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight); + addSuccessorWithWeight( + SwitchMBB, DefaultMBB, + // The default destination is the first successor in IR. + BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0) + : 0); + + // Insert the true branch. + SDValue BrCond = + DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(Small.MBB)); + // Insert the false branch. + BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, + DAG.getBasicBlock(DefaultMBB)); + + DAG.setRoot(BrCond); + return; + } + } + } + + if (TM.getOptLevel() != CodeGenOpt::None) { + // Order cases by weight so the most likely case will be checked first. + std::sort(W.FirstCluster, W.LastCluster + 1, + [](const CaseCluster &a, const CaseCluster &b) { + return a.Weight > b.Weight; + }); + + // Rearrange the case blocks so that the last one falls through if possible + // without without changing the order of weights. + for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { + --I; + if (I->Weight > W.LastCluster->Weight) + break; + if (I->Kind == CC_Range && I->MBB == NextMBB) { + std::swap(*I, *W.LastCluster); + break; + } + } + } + + // Compute total weight. + uint32_t UnhandledWeights = 0; + for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) { + UnhandledWeights += I->Weight; + assert(UnhandledWeights >= I->Weight && "Weight overflow!"); + } + + MachineBasicBlock *CurMBB = W.MBB; + for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { + MachineBasicBlock *Fallthrough; + if (I == W.LastCluster) { + // For the last cluster, fall through to the default destination. + Fallthrough = DefaultMBB; + } else { + Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); + CurMF->insert(BBI, Fallthrough); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + switch (I->Kind) { + case CC_JumpTable: { + // FIXME: Optimize away range check based on pivot comparisons. + JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first; + JumpTable *JT = &JTCases[I->JTCasesIndex].second; + + // The jump block hasn't been inserted yet; insert it here. + MachineBasicBlock *JumpMBB = JT->MBB; + CurMF->insert(BBI, JumpMBB); + addSuccessorWithWeight(CurMBB, Fallthrough); + addSuccessorWithWeight(CurMBB, JumpMBB); + + // The jump table header will be inserted in our current block, do the + // range check, and fall through to our fallthrough block. + JTH->HeaderBB = CurMBB; + JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. + + // If we're in the right place, emit the jump table header right now. + if (CurMBB == SwitchMBB) { + visitJumpTableHeader(*JT, *JTH, SwitchMBB); + JTH->Emitted = true; + } + break; + } + case CC_BitTests: { + // FIXME: Optimize away range check based on pivot comparisons. + BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex]; + + // The bit test blocks haven't been inserted yet; insert them here. + for (BitTestCase &BTC : BTB->Cases) + CurMF->insert(BBI, BTC.ThisBB); + + // Fill in fields of the BitTestBlock. + BTB->Parent = CurMBB; + BTB->Default = Fallthrough; + + // If we're in the right place, emit the bit test header header right now. + if (CurMBB ==SwitchMBB) { + visitBitTestHeader(*BTB, SwitchMBB); + BTB->Emitted = true; + } + break; + } + case CC_Range: { + const Value *RHS, *LHS, *MHS; + ISD::CondCode CC; + if (I->Low == I->High) { + // Check Cond == I->Low. + CC = ISD::SETEQ; + LHS = Cond; + RHS=I->Low; + MHS = nullptr; + } else { + // Check I->Low <= Cond <= I->High. + CC = ISD::SETLE; + LHS = I->Low; + MHS = Cond; + RHS = I->High; + } + + // The false weight is the sum of all unhandled cases. + UnhandledWeights -= I->Weight; + CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight, + UnhandledWeights); + + if (CurMBB == SwitchMBB) + visitSwitchCase(CB, SwitchMBB); + else + SwitchCases.push_back(CB); + + break; + } + } + CurMBB = Fallthrough; + } +} + +void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, + const SwitchWorkListItem &W, + Value *Cond, + MachineBasicBlock *SwitchMBB) { + assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && + "Clusters not sorted?"); + + assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); + + // Balance the tree based on branch weights to create a near-optimal (in terms + // of search time given key frequency) binary search tree. See e.g. Kurt + // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). + CaseClusterIt LastLeft = W.FirstCluster; + CaseClusterIt FirstRight = W.LastCluster; + uint32_t LeftWeight = LastLeft->Weight; + uint32_t RightWeight = FirstRight->Weight; + + // Move LastLeft and FirstRight towards each other from opposite directions to + // find a partitioning of the clusters which balances the weight on both + // sides. If LeftWeight and RightWeight are equal, alternate which side is + // taken to ensure 0-weight nodes are distributed evenly. + unsigned I = 0; + while (LastLeft + 1 < FirstRight) { + if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1))) + LeftWeight += (++LastLeft)->Weight; + else + RightWeight += (--FirstRight)->Weight; + I++; + } + assert(LastLeft + 1 == FirstRight); + assert(LastLeft >= W.FirstCluster); + assert(FirstRight <= W.LastCluster); + + // Use the first element on the right as pivot since we will make less-than + // comparisons against it. + CaseClusterIt PivotCluster = FirstRight; + assert(PivotCluster > W.FirstCluster); + assert(PivotCluster <= W.LastCluster); + + CaseClusterIt FirstLeft = W.FirstCluster; + CaseClusterIt LastRight = W.LastCluster; + + const ConstantInt *Pivot = PivotCluster->Low; + + // New blocks will be inserted immediately after the current one. + MachineFunction::iterator BBI = W.MBB; + ++BBI; + + // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, + // we can branch to its destination directly if it's squeezed exactly in + // between the known lower bound and Pivot - 1. + MachineBasicBlock *LeftMBB; + if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range && + FirstLeft->Low == W.GE && + (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) { + LeftMBB = FirstLeft->MBB; + } else { + LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, LeftMBB); + WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot}); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a + // single cluster, RHS.Low == Pivot, and we can branch to its destination + // directly if RHS.High equals the current upper bound. + MachineBasicBlock *RightMBB; + if (FirstRight == LastRight && FirstRight->Kind == CC_Range && + W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) { + RightMBB = FirstRight->MBB; + } else { + RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, RightMBB); + WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT}); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + // Create the CaseBlock record that will be used to lower the branch. + CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, + LeftWeight, RightWeight); + + if (W.MBB == SwitchMBB) + visitSwitchCase(CB, SwitchMBB); + else + SwitchCases.push_back(CB); +} + +void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { + // Extract cases from the switch. + BranchProbabilityInfo *BPI = FuncInfo.BPI; + CaseClusterVector Clusters; + Clusters.reserve(SI.getNumCases()); + for (auto I : SI.cases()) { + MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; + const ConstantInt *CaseVal = I.getCaseValue(); + uint32_t Weight = + BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0; + Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight)); + } + + MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; + + // Cluster adjacent cases with the same destination. We do this at all + // optimization levels because it's cheap to do and will make codegen faster + // if there are many clusters. + sortAndRangeify(Clusters); + + if (TM.getOptLevel() != CodeGenOpt::None) { + // Replace an unreachable default with the most popular destination. + // FIXME: Exploit unreachable default more aggressively. + bool UnreachableDefault = + isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()); + if (UnreachableDefault && !Clusters.empty()) { + DenseMap<const BasicBlock *, unsigned> Popularity; + unsigned MaxPop = 0; + const BasicBlock *MaxBB = nullptr; + for (auto I : SI.cases()) { + const BasicBlock *BB = I.getCaseSuccessor(); + if (++Popularity[BB] > MaxPop) { + MaxPop = Popularity[BB]; + MaxBB = BB; + } + } + // Set new default. + assert(MaxPop > 0 && MaxBB); + DefaultMBB = FuncInfo.MBBMap[MaxBB]; + + // Remove cases that were pointing to the destination that is now the + // default. + CaseClusterVector New; + New.reserve(Clusters.size()); + for (CaseCluster &CC : Clusters) { + if (CC.MBB != DefaultMBB) + New.push_back(CC); + } + Clusters = std::move(New); + } + } + + // If there is only the default destination, jump there directly. + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + if (Clusters.empty()) { + SwitchMBB->addSuccessor(DefaultMBB); + if (DefaultMBB != NextBlock(SwitchMBB)) { + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(DefaultMBB))); + } + return; + } + + if (TM.getOptLevel() != CodeGenOpt::None) { + findJumpTables(Clusters, &SI, DefaultMBB); + findBitTestClusters(Clusters, &SI); + } + + + DEBUG({ + dbgs() << "Case clusters: "; + for (const CaseCluster &C : Clusters) { + if (C.Kind == CC_JumpTable) dbgs() << "JT:"; + if (C.Kind == CC_BitTests) dbgs() << "BT:"; + + C.Low->getValue().print(dbgs(), true); + if (C.Low != C.High) { + dbgs() << '-'; + C.High->getValue().print(dbgs(), true); + } + dbgs() << ' '; + } + dbgs() << '\n'; + }); + + assert(!Clusters.empty()); + SwitchWorkList WorkList; + CaseClusterIt First = Clusters.begin(); + CaseClusterIt Last = Clusters.end() - 1; + WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr}); + + while (!WorkList.empty()) { + SwitchWorkListItem W = WorkList.back(); + WorkList.pop_back(); + unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; + + if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) { + // For optimized builds, lower large range as a balanced binary tree. + splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); + continue; + } + + lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 9070091..f0c03af 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -17,9 +17,11 @@ #include "StatepointLowering.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/Constants.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLowering.h" @@ -133,26 +135,65 @@ private: /// SDNodes we create. unsigned SDNodeOrder; - /// Case - A struct to record the Value for a switch case, and the - /// case's target basic block. - struct Case { - const Constant *Low; - const Constant *High; - MachineBasicBlock* BB; - uint32_t ExtraWeight; + enum CaseClusterKind { + /// A cluster of adjacent case labels with the same destination, or just one + /// case. + CC_Range, + /// A cluster of cases suitable for jump table lowering. + CC_JumpTable, + /// A cluster of cases suitable for bit test lowering. + CC_BitTests + }; - Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { } - Case(const Constant *low, const Constant *high, MachineBasicBlock *bb, - uint32_t extraweight) : Low(low), High(high), BB(bb), - ExtraWeight(extraweight) { } + /// A cluster of case labels. + struct CaseCluster { + CaseClusterKind Kind; + const ConstantInt *Low, *High; + union { + MachineBasicBlock *MBB; + unsigned JTCasesIndex; + unsigned BTCasesIndex; + }; + uint32_t Weight; + + static CaseCluster range(const ConstantInt *Low, const ConstantInt *High, + MachineBasicBlock *MBB, uint32_t Weight) { + CaseCluster C; + C.Kind = CC_Range; + C.Low = Low; + C.High = High; + C.MBB = MBB; + C.Weight = Weight; + return C; + } - APInt size() const { - const APInt &rHigh = cast<ConstantInt>(High)->getValue(); - const APInt &rLow = cast<ConstantInt>(Low)->getValue(); - return (rHigh - rLow + 1ULL); + static CaseCluster jumpTable(const ConstantInt *Low, + const ConstantInt *High, unsigned JTCasesIndex, + uint32_t Weight) { + CaseCluster C; + C.Kind = CC_JumpTable; + C.Low = Low; + C.High = High; + C.JTCasesIndex = JTCasesIndex; + C.Weight = Weight; + return C; + } + + static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High, + unsigned BTCasesIndex, uint32_t Weight) { + CaseCluster C; + C.Kind = CC_BitTests; + C.Low = Low; + C.High = High; + C.BTCasesIndex = BTCasesIndex; + C.Weight = Weight; + return C; } }; + typedef std::vector<CaseCluster> CaseClusterVector; + typedef CaseClusterVector::iterator CaseClusterIt; + struct CaseBits { uint64_t Mask; MachineBasicBlock* BB; @@ -162,51 +203,14 @@ private: CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, uint32_t Weight): Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { } - }; - - typedef std::vector<Case> CaseVector; - typedef std::vector<CaseBits> CaseBitsVector; - typedef CaseVector::iterator CaseItr; - typedef std::pair<CaseItr, CaseItr> CaseRange; - - /// CaseRec - A struct with ctor used in lowering switches to a binary tree - /// of conditional branches. - struct CaseRec { - CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge, - CaseRange r) : - CaseBB(bb), LT(lt), GE(ge), Range(r) {} - - /// CaseBB - The MBB in which to emit the compare and branch - MachineBasicBlock *CaseBB; - /// LT, GE - If nonzero, we know the current case value must be less-than or - /// greater-than-or-equal-to these Constants. - const Constant *LT; - const Constant *GE; - /// Range - A pair of iterators representing the range of case values to be - /// processed at this point in the binary search tree. - CaseRange Range; - }; - typedef std::vector<CaseRec> CaseRecVector; - - /// The comparison function for sorting the switch case values in the vector. - /// WARNING: Case ranges should be disjoint! - struct CaseCmp { - bool operator()(const Case &C1, const Case &C2) { - assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); - const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); - const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); - return CI1->getValue().slt(CI2->getValue()); - } + CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {} }; - struct CaseBitsCmp { - bool operator()(const CaseBits &C1, const CaseBits &C2) { - return C1.Bits > C2.Bits; - } - }; + typedef std::vector<CaseBits> CaseBitsVector; - void Clusterify(CaseVector &Cases, const SwitchInst &SI); + /// Sort Clusters and merge adjacent cases. + void sortAndRangeify(CaseClusterVector &Clusters); /// CaseBlock - This structure is used to communicate between /// SelectionDAGBuilder and SDISel for the code generation of additional basic @@ -296,6 +300,58 @@ private: BitTestInfo Cases; }; + /// Minimum jump table density, in percent. + enum { MinJumpTableDensity = 40 }; + + /// Check whether a range of clusters is dense enough for a jump table. + bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases, + unsigned First, unsigned Last); + + /// Build a jump table cluster from Clusters[First..Last]. Returns false if it + /// decides it's not a good idea. + bool buildJumpTable(CaseClusterVector &Clusters, unsigned First, + unsigned Last, const SwitchInst *SI, + MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster); + + /// Find clusters of cases suitable for jump table lowering. + void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI, + MachineBasicBlock *DefaultMBB); + + /// Check whether the range [Low,High] fits in a machine word. + bool rangeFitsInWord(const APInt &Low, const APInt &High); + + /// Check whether these clusters are suitable for lowering with bit tests based + /// on the number of destinations, comparison metric, and range. + bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, + const APInt &Low, const APInt &High); + + /// Build a bit test cluster from Clusters[First..Last]. Returns false if it + /// decides it's not a good idea. + bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, + const SwitchInst *SI, CaseCluster &BTCluster); + + /// Find clusters of cases suitable for bit test lowering. + void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI); + + struct SwitchWorkListItem { + MachineBasicBlock *MBB; + CaseClusterIt FirstCluster; + CaseClusterIt LastCluster; + const ConstantInt *GE; + const ConstantInt *LT; + }; + typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList; + + /// Emit comparison and split W into two subtrees. + void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, + Value *Cond, MachineBasicBlock *SwitchMBB); + + /// Lower W. + void lowerWorkItem(SwitchWorkListItem W, Value *Cond, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *DefaultMBB); + + /// A class which encapsulates all of the information needed to generate a /// stack protector check and signals to isel via its state being initialized /// that a stack protector needs to be generated. @@ -405,7 +461,6 @@ private: StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr), FailureMBB(nullptr), Guard(nullptr), GuardReg(0) { } - ~StackProtectorDescriptor() { } /// Returns true if all fields of the stack protector descriptor are /// initialized implying that we should/are ready to emit a stack protector. @@ -605,10 +660,16 @@ public: void visit(unsigned Opcode, const User &I); + /// getCopyFromRegs - If there was virtual register allocated for the value V + /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. + SDValue getCopyFromRegs(const Value *V, Type *Ty); + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void resolveDanglingDebugInfo(const Value *V, SDValue Val); SDValue getValue(const Value *V); + bool findValue(const Value *V) const; + SDValue getNonRegisterValue(const Value *V); SDValue getValueImpl(const Value *V); @@ -618,13 +679,6 @@ public: N = NewN; } - void removeValue(const Value *V) { - // This is to support hack in lowerCallFromStatepoint - // Should be removed when hack is resolved - if (NodeMap.count(V)) - NodeMap.erase(V); - } - void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; assert(!N.getNode() && "Already set a value for this node!"); @@ -652,7 +706,7 @@ public: unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - bool UseVoidTy = false, + Type *ReturnTy, MachineBasicBlock *LandingPad = nullptr, bool IsPatchPoint = false); @@ -660,6 +714,10 @@ public: /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); + // This function is responsible for the whole statepoint lowering process. + // It uniformly handles invoke and call statepoints. + void LowerStatepoint(ImmutableStatepoint Statepoint, + MachineBasicBlock *LandingPad = nullptr); private: std::pair<SDValue, SDValue> lowerInvokable( TargetLowering::CallLoweringInfo &CLI, @@ -672,27 +730,6 @@ private: void visitIndirectBr(const IndirectBrInst &I); void visitUnreachable(const UnreachableInst &I); - // Helpers for visitSwitch - bool handleSmallSwitchRange(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, - MachineBasicBlock *SwitchBB); - bool handleJTSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, - MachineBasicBlock *SwitchBB); - bool handleBTSplitSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock *SwitchBB); - bool handleBitTestsSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, - MachineBasicBlock *SwitchBB); - uint32_t getEdgeWeight(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const; void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, @@ -713,6 +750,8 @@ public: void visitJumpTable(JumpTable &JT); void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); + unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV, + MachineBasicBlock *LPadMBB); private: // These all get lowered before this pass. @@ -772,6 +811,8 @@ private: void visitStore(const StoreInst &I); void visitMaskedLoad(const CallInst &I); void visitMaskedStore(const CallInst &I); + void visitMaskedGather(const CallInst &I); + void visitMaskedScatter(const CallInst &I); void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); void visitAtomicRMW(const AtomicRMWInst &I); void visitFence(const FenceInst &I); @@ -820,9 +861,91 @@ private: /// EmitFuncArgumentDbgValue - If V is an function argument then create /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. - bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr, + bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable, + DIExpression *Expr, DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N); + + /// Return the next block after MBB, or nullptr if there is none. + MachineBasicBlock *NextBlock(MachineBasicBlock *MBB); + + /// Update the DAG and DAG builder with the relevant information after + /// a new root node has been created which could be a tail call. + void updateDAGForMaybeTailCall(SDValue MaybeTC); +}; + +/// RegsForValue - This struct represents the registers (physical or virtual) +/// that a particular set of values is assigned, and the type information about +/// the value. The most common situation is to represent one value at a time, +/// but struct or array values are handled element-wise as multiple values. The +/// splitting of aggregates is performed recursively, so that we never have +/// aggregate-typed registers. The values at this point do not necessarily have +/// legal types, so each value may require one or more registers of some legal +/// type. +/// +struct RegsForValue { + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector<EVT, 4> ValueVTs; + + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector<MVT, 4> RegVTs; + + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector<unsigned, 4> Regs; + + RegsForValue(); + + RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt); + + RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg, + Type *Ty); + + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, + SDLoc dl, + SDValue &Chain, SDValue *Flag, + const Value *V = nullptr) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified + /// value into the registers specified by this object. This uses Chain/Flag + /// as the input and updates them for the output Chain/Flag. If the Flag + /// pointer is nullptr, no flag is used. If V is not nullptr, then it is used + /// in printing better diagnostic messages on error. + void + getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, + SDValue *Flag, const Value *V = nullptr, + ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Kind, + bool HasMatching, unsigned MatchingIdx, SDLoc dl, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index e8577d8..96ee899 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -95,6 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_ALLOC_RECOVER: return "FRAME_ALLOC_RECOVER"; case ISD::READ_REGISTER: return "READ_REGISTER"; case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; @@ -187,10 +188,15 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FMUL: return "fmul"; case ISD::FDIV: return "fdiv"; case ISD::FMA: return "fma"; + case ISD::FMAD: return "fmad"; case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; case ISD::FPOW: return "fpow"; + case ISD::SMIN: return "smin"; + case ISD::SMAX: return "smax"; + case ISD::UMIN: return "umin"; + case ISD::UMAX: return "umax"; case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; @@ -271,6 +277,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STORE: return "store"; case ISD::MLOAD: return "masked_load"; case ISD::MSTORE: return "masked_store"; + case ISD::MGATHER: return "masked_gather"; + case ISD::MSCATTER: return "masked_scatter"; case ISD::VAARG: return "vaarg"; case ISD::VACOPY: return "vacopy"; case ISD::VAEND: return "vaend"; @@ -284,6 +292,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::DEBUGTRAP: return "debugtrap"; case ISD::LIFETIME_START: return "lifetime.start"; case ISD::LIFETIME_END: return "lifetime.end"; + case ISD::GC_TRANSITION_START: return "gc_transition.start"; + case ISD::GC_TRANSITION_END: return "gc_transition.end"; // Bit manipulation case ISD::BSWAP: return "bswap"; @@ -518,22 +528,20 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getNodeId() != -1) OS << " [ID=" << getNodeId() << ']'; - DebugLoc dl = getDebugLoc(); - if (G && !dl.isUnknown()) { - DIScope - Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); - OS << " dbg:"; - assert((!Scope || Scope.isScope()) && - "Scope of a DebugLoc should be null or a DIScope."); - // Omit the directory, since it's usually long and uninteresting. - if (Scope) - OS << Scope.getFilename(); - else - OS << "<unknown>"; - OS << ':' << dl.getLine(); - if (dl.getCol() != 0) - OS << ':' << dl.getCol(); - } + if (!G) + return; + + DILocation *L = getDebugLoc(); + if (!L) + return; + + if (auto *Scope = L->getScope()) + OS << Scope->getFilename(); + else + OS << "<unknown>"; + OS << ':' << L->getLine(); + if (unsigned C = L->getColumn()) + OS << ':' << C; } static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index ef54525..22f592a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/GCStrategy.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/PostOrderIterator.h" @@ -19,10 +19,11 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -31,6 +32,8 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -40,6 +43,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -47,7 +51,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -166,14 +169,13 @@ static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " "instruction selector")); -static cl::opt<bool> -EnableFastISelAbort("fast-isel-abort", cl::Hidden, - cl::desc("Enable abort calls when \"fast\" instruction selection " - "fails to lower an instruction")); -static cl::opt<bool> -EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden, - cl::desc("Enable abort calls when \"fast\" instruction selection " - "fails to lower a formal argument")); +static cl::opt<int> EnableFastISelAbort( + "fast-isel-abort", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower an instruction: 0 disable the abort, 1 will " + "abort but for args, calls and terminators, 2 will also " + "abort for argument lowering, and 3 will never fallback " + "to SelectionDAG.")); static cl::opt<bool> UseMBPI("use-mbpi", @@ -291,7 +293,8 @@ namespace llvm { const TargetLowering *TLI = IS->TLI; const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); - if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() || + if (OptLevel == CodeGenOpt::None || + (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) || TLI->getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); if (TLI->getSchedulingPreference() == Sched::RegPressure) @@ -349,7 +352,8 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); - initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { @@ -363,7 +367,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -376,7 +380,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { /// /// This is required for correctness, so it must be done at -O0. /// -static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { +static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) { // Loop for blocks with phi nodes. for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { PHINode *PN = dyn_cast<PHINode>(BB->begin()); @@ -400,8 +404,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { continue; // Okay, we have to split this edge. - SplitCriticalEdge(Pred->getTerminator(), - GetSuccessorNumber(Pred, BB), SDISel, true); + SplitCriticalEdge( + Pred->getTerminator(), GetSuccessorNumber(Pred, BB), + CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges()); goto ReprocessBlock; } } @@ -412,7 +417,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && - "-fast-isel-abort requires -fast-isel"); + "-fast-isel-abort > 0 requires -fast-isel"); const Function &Fn = *mf.getFunction(); MF = &mf; @@ -433,12 +438,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); - LibInfo = &getAnalysis<TargetLibraryInfo>(); + LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); + SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA); CurDAG->init(*MF); FuncInfo->set(Fn, *MF, CurDAG); @@ -496,12 +501,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock::iterator InsertPos = Def; const MDNode *Variable = MI->getDebugVariable(); const MDNode *Expr = MI->getDebugExpression(); + DebugLoc DL = MI->getDebugLoc(); bool IsIndirect = MI->isIndirectDebugValue(); unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; + assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); // Def is never a terminator here, so it is ok to increment InsertPos. - BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset, - Variable, Expr); + BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, LDI->second, Offset, Variable, Expr); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only @@ -519,9 +526,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { CopyUseMI = nullptr; break; } if (CopyUseMI) { + // Use MI's debug location, which describes where Variable was + // declared, rather than whatever is attached to CopyUseMI. MachineInstr *NewMI = - BuildMI(*MF, CopyUseMI->getDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); @@ -570,6 +578,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TargetRegisterInfo::isVirtualRegister(To)) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. + + + // Replacing one register with another won't touch the kill flags. + // We need to conservatively clear the kill flags as a kill on the old + // register might dominate existing uses of the new register. + if (!MRI.use_empty(To)) + MRI.clearKillFlags(From); MRI.replaceRegWith(From, To); } @@ -591,9 +606,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { - // Lower all of the non-terminator instructions. If a call is emitted - // as a tail call, cease emitting nodes for this block. Terminators - // are handled below. + // Lower the instructions. If a call is emitted as a tail call, cease emitting + // nodes for this block. for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) SDB->visit(*I); @@ -656,7 +670,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { (void)BlockNumber; bool MatchFilterBB = false; (void)MatchFilterBB; #ifndef NDEBUG - MatchFilterBB = (!FilterDAGBasicBlockName.empty() && + MatchFilterBB = (FilterDAGBasicBlockName.empty() || FilterDAGBasicBlockName == FuncInfo->MBB->getBasicBlock()->getName().str()); #endif @@ -667,8 +681,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #endif { BlockNumber = FuncInfo->MBB->getNumber(); - BlockName = MF->getName().str() + ":" + - FuncInfo->MBB->getBasicBlock()->getName().str(); + BlockName = + (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); @@ -904,9 +918,11 @@ void SelectionDAGISel::DoInstructionSelection() { /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and /// do other setup for EH landing-pad blocks. -void SelectionDAGISel::PrepareEHLandingPad() { +bool SelectionDAGISel::PrepareEHLandingPad() { MachineBasicBlock *MBB = FuncInfo->MBB; + const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); + // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(MBB); @@ -918,14 +934,53 @@ void SelectionDAGISel::PrepareEHLandingPad() { BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); + // If this is an MSVC-style personality function, we need to split the landing + // pad into several BBs. + const BasicBlock *LLVMBB = MBB->getBasicBlock(); + const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst(); + MF->getMMI().addPersonality( + MBB, cast<Function>(LPadInst->getPersonalityFn()->stripPointerCasts())); + EHPersonality Personality = MF->getMMI().getPersonalityType(); + + if (isMSVCEHPersonality(Personality)) { + SmallVector<MachineBasicBlock *, 4> ClauseBBs; + const IntrinsicInst *ActionsCall = + dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt()); + // Get all invoke BBs that unwind to this landingpad. + SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(), + MBB->pred_end()); + if (ActionsCall && ActionsCall->getIntrinsicID() == Intrinsic::eh_actions) { + // If this is a call to llvm.eh.actions followed by indirectbr, then we've + // run WinEHPrepare, and we should remove this block from the machine CFG. + // Mark the targets of the indirectbr as landingpads instead. + for (const BasicBlock *LLVMSucc : successors(LLVMBB)) { + MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc]; + // Add the edge from the invoke to the clause. + for (MachineBasicBlock *InvokeBB : InvokeBBs) + InvokeBB->addSuccessor(ClauseBB); + + // Mark the clause as a landing pad or MI passes will delete it. + ClauseBB->setIsLandingPad(); + } + } + + // Remove the edge from the invoke to the lpad. + for (MachineBasicBlock *InvokeBB : InvokeBBs) + InvokeBB->removeSuccessor(MBB); + + // Don't select instructions for the landingpad. + return false; + } + // Mark exception register as live in. - const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); if (unsigned Reg = TLI->getExceptionPointerRegister()) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); // Mark exception selector register as live in. if (unsigned Reg = TLI->getExceptionSelectorRegister()) FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); + + return true; } /// isFoldedOrDeadInstruction - Return true if the specified instruction is @@ -1095,8 +1150,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Setup an EH landing-pad block. FuncInfo->ExceptionPointerVirtReg = 0; FuncInfo->ExceptionSelectorVirtReg = 0; - if (FuncInfo->MBB->isLandingPad()) - PrepareEHLandingPad(); + if (LLVMBB->isLandingPad()) + if (!PrepareEHLandingPad()) + continue; // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { @@ -1111,8 +1167,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (!FastIS->lowerArguments()) { // Fast isel failed to lower these arguments ++NumFastIselFailLowerArguments; - if (EnableFastISelAbortArgs) - llvm_unreachable("FastISel didn't lower all arguments"); + if (EnableFastISelAbort > 1) + report_fatal_error("FastISel didn't lower all arguments"); // Use SelectionDAG argument lowering LowerArguments(Fn); @@ -1181,6 +1237,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { dbgs() << "FastISel missed call: "; Inst->dump(); } + if (EnableFastISelAbort > 2) + // FastISel selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + report_fatal_error("FastISel didn't select the entire block"); if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { unsigned &R = FuncInfo->ValueMap[Inst]; @@ -1208,24 +1268,24 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { continue; } - if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) { - // Don't abort, and use a different message for terminator misses. - NumFastIselFailures += NumFastIselRemaining; - if (EnableFastISelVerbose || EnableFastISelAbort) { + bool ShouldAbort = EnableFastISelAbort; + if (EnableFastISelVerbose || EnableFastISelAbort) { + if (isa<TerminatorInst>(Inst)) { + // Use a different message for terminator misses. dbgs() << "FastISel missed terminator: "; - Inst->dump(); - } - } else { - NumFastIselFailures += NumFastIselRemaining; - if (EnableFastISelVerbose || EnableFastISelAbort) { + // Don't abort unless for terminator unless the level is really high + ShouldAbort = (EnableFastISelAbort > 2); + } else { dbgs() << "FastISel miss: "; - Inst->dump(); } - if (EnableFastISelAbort) - // The "fast" selector couldn't handle something and bailed. - // For the purpose of debugging, just abort. - llvm_unreachable("FastISel didn't select the entire block"); + Inst->dump(); } + if (ShouldAbort) + // FastISel selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + report_fatal_error("FastISel didn't select the entire block"); + + NumFastIselFailures += NumFastIselRemaining; break; } @@ -1354,21 +1414,15 @@ SelectionDAGISel::FinishBasicBlock() { << FuncInfo->PHINodesToUpdate[i].first << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); - const bool MustUpdatePHINodes = SDB->SwitchCases.empty() && - SDB->JTCases.empty() && - SDB->BitTestCases.empty(); - // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. - if (MustUpdatePHINodes) { - for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); - assert(PHI->isPHI() && - "This is not a machine PHI node that we are updating!"); - if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) - continue; - PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); - } + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) + continue; + PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); } // Handle stack protector. @@ -1413,10 +1467,6 @@ SelectionDAGISel::FinishBasicBlock() { SDB->SPDescriptor.resetPerBBState(); } - // If we updated PHI Nodes, return early. - if (MustUpdatePHINodes) - return; - for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { // Lower header first, if it wasn't already lowered if (!SDB->BitTestCases[i].Emitted) { @@ -1530,16 +1580,6 @@ SelectionDAGISel::FinishBasicBlock() { } SDB->JTCases.clear(); - // If the switch block involved a branch to one of the actual successors, we - // need to update PHI nodes in that block. - for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); - assert(PHI->isPHI() && - "This is not a machine PHI node that we are updating!"); - if (FuncInfo->MBB->isSuccessor(PHI->getParent())) - PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); - } - // If we generated any switch lowering information, build and codegen any // additional DAGs necessary. for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { @@ -1677,11 +1717,10 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, return false; } - /// SelectInlineAsmMemoryOperands - Calls to this are automatically generated /// by tblgen. Others should not call it. void SelectionDAGISel:: -SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { +SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SDLoc DL) { std::vector<SDValue> InOps; std::swap(InOps, Ops); @@ -1704,16 +1743,30 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { } else { assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && "Memory operand with multiple values?"); + + unsigned TiedToOperand; + if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) { + // We need the constraint ID from the operand this is tied to. + unsigned CurOp = InlineAsm::Op_FirstOperand; + Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + for (; TiedToOperand; --TiedToOperand) { + CurOp += InlineAsm::getNumOperandRegisters(Flags)+1; + Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + } + } + // Otherwise, this is a memory operand. Ask the target to select it. std::vector<SDValue> SelOps; - if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) + if (SelectInlineAsmMemoryOperand(InOps[i+1], + InlineAsm::getMemoryConstraintID(Flags), + SelOps)) report_fatal_error("Could not match memory address. Inline asm" " failure!"); // Add this to the output node. unsigned NewFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); - Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32)); + Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); i += 2; } @@ -1859,11 +1912,13 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, } SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { + SDLoc DL(N); + std::vector<SDValue> Ops(N->op_begin(), N->op_end()); - SelectInlineAsmMemoryOperands(Ops); + SelectInlineAsmMemoryOperands(Ops, DL); - EVT VTs[] = { MVT::Other, MVT::Glue }; - SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, Ops); + const EVT VTs[] = {MVT::Other, MVT::Glue}; + SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops); New->setNodeId(-1); return New.getNode(); } @@ -1871,12 +1926,12 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { SDNode *SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { SDLoc dl(Op); - MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(0)); + MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0)); SDValue New = CurDAG->getCopyFromReg( - CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0)); + Op->getOperand(0), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); return New.getNode(); } @@ -1889,7 +1944,7 @@ SDNode unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getOperand(2).getValueType()); SDValue New = CurDAG->getCopyToReg( - CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2)); + Op->getOperand(0), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); return New.getNode(); } @@ -2464,7 +2519,7 @@ public: SelectionDAG::DAGUpdateListener(DAG), RecordedNodes(RN), MatchScopes(MS) { } - void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { // Some early-returns here to avoid the search if we deleted the node or // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we // do, so it's unnecessary to update matching state at that point). @@ -2885,7 +2940,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); RecordedNodes.push_back(std::pair<SDValue, SDNode*>( - CurDAG->getTargetConstant(Val, VT), nullptr)); + CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), + VT), nullptr)); continue; } case OPC_EmitRegister: { @@ -2917,10 +2973,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (Imm->getOpcode() == ISD::Constant) { const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue(); - Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true); + Imm = CurDAG->getConstant(*Val, SDLoc(NodeToMatch), Imm.getValueType(), + true); } else if (Imm->getOpcode() == ISD::ConstantFP) { const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); - Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true); + Imm = CurDAG->getConstantFP(*Val, SDLoc(NodeToMatch), + Imm.getValueType(), true); } RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 33c20d3..2d4ab6c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" @@ -37,13 +38,19 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered"); STATISTIC(StatepointMaxSlotsRequired, "Maximum number of stack slots required for a singe statepoint"); -void -StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { +static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops, + SelectionDAGBuilder &Builder, uint64_t Value) { + SDLoc L = Builder.getCurSDLoc(); + Ops.push_back(Builder.DAG.getTargetConstant(StackMaps::ConstantOp, L, + MVT::i64)); + Ops.push_back(Builder.DAG.getTargetConstant(Value, L, MVT::i64)); +} + +void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { // Consistency check assert(PendingGCRelocateCalls.empty() && "Trying to visit statepoint before finished processing previous one"); Locations.clear(); - RelocLocations.clear(); NextSlotToAllocate = 0; // Need to resize this on each safepoint - we need the two to stay in // sync and the clear patterns of a SelectionDAGBuilder have no relation @@ -53,9 +60,9 @@ StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { AllocatedStackSlots[i] = false; } } + void StatepointLoweringState::clear() { Locations.clear(); - RelocLocations.clear(); AllocatedStackSlots.clear(); assert(PendingGCRelocateCalls.empty() && "cleared before statepoint sequence completed"); @@ -222,75 +229,94 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, /// Extract call from statepoint, lower it and return pointer to the /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result -static SDNode *lowerCallFromStatepoint(const CallInst &CI, - SelectionDAGBuilder &Builder) { - - assert(Intrinsic::experimental_gc_statepoint == - dyn_cast<IntrinsicInst>(&CI)->getIntrinsicID() && - "function called must be the statepoint function"); +static SDNode * +lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, + SelectionDAGBuilder &Builder, + SmallVectorImpl<SDValue> &PendingExports) { + + ImmutableCallSite CS(ISP.getCallSite()); + + SDValue ActualCallee = Builder.getValue(ISP.getActualCallee()); + + // Handle immediate and symbolic callees. + if (auto *ConstCallee = dyn_cast<ConstantSDNode>(ActualCallee.getNode())) + ActualCallee = Builder.DAG.getIntPtrConstant(ConstCallee->getZExtValue(), + Builder.getCurSDLoc(), + /*isTarget=*/true); + else if (auto *SymbolicCallee = + dyn_cast<GlobalAddressSDNode>(ActualCallee.getNode())) + ActualCallee = Builder.DAG.getTargetGlobalAddress( + SymbolicCallee->getGlobal(), SDLoc(SymbolicCallee), + SymbolicCallee->getValueType(0)); + + assert(CS.getCallingConv() != CallingConv::AnyReg && + "anyregcc is not supported on statepoints!"); + + Type *DefTy = ISP.getActualReturnType(); + bool HasDef = !DefTy->isVoidTy(); + + SDValue ReturnValue, CallEndVal; + std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands( + ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos, + ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad, + false /* IsPatchPoint */); + + SDNode *CallEnd = CallEndVal.getNode(); + + // Get a call instruction from the call sequence chain. Tail calls are not + // allowed. The following code is essentially reverse engineering X86's + // LowerCallTo. + // + // We are expecting DAG to have the following form: + // + // ch = eh_label (only in case of invoke statepoint) + // ch, glue = callseq_start ch + // ch, glue = X86::Call ch, glue + // ch, glue = callseq_end ch, glue + // get_return_value ch, glue + // + // get_return_value can either be a CopyFromReg to grab the return value from + // %RAX, or it can be a LOAD to load a value returned by reference via a stack + // slot. + + if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg || + CallEnd->getOpcode() == ISD::LOAD)) + CallEnd = CallEnd->getOperand(0).getNode(); + + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!"); - ImmutableStatepoint StatepointOperands(&CI); - - // Lower the actual call itself - This is a bit of a hack, but we want to - // avoid modifying the actual lowering code. This is similiar in intent to - // the LowerCallOperands mechanism used by PATCHPOINT, but is structured - // differently. Hopefully, this is slightly more robust w.r.t. calling - // convention, return values, and other function attributes. - Value *ActualCallee = const_cast<Value *>(StatepointOperands.actualCallee()); - - std::vector<Value *> Args; - CallInst::const_op_iterator arg_begin = StatepointOperands.call_args_begin(); - CallInst::const_op_iterator arg_end = StatepointOperands.call_args_end(); - Args.insert(Args.end(), arg_begin, arg_end); - // TODO: remove the creation of a new instruction! We should not be - // modifying the IR (even temporarily) at this point. - CallInst *Tmp = CallInst::Create(ActualCallee, Args); - Tmp->setTailCall(CI.isTailCall()); - Tmp->setCallingConv(CI.getCallingConv()); - Tmp->setAttributes(CI.getAttributes()); - Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false); - - // Handle the return value of the call iff any. - const bool HasDef = !Tmp->getType()->isVoidTy(); if (HasDef) { - // The value of the statepoint itself will be the value of call itself. - // We'll replace the actually call node shortly. gc_result will grab - // this value. - Builder.setValue(&CI, Builder.getValue(Tmp)); + if (CS.isInvoke()) { + // Result value will be used in different basic block for invokes + // so we need to export it now. But statepoint call has a different type + // than the actuall call. It means that standart exporting mechanism will + // create register of the wrong type. So instead we need to create + // register with correct type and save value into it manually. + // TODO: To eliminate this problem we can remove gc.result intrinsics + // completelly and make statepoint call to return a tuple. + unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType()); + RegsForValue RFV(*Builder.DAG.getContext(), + Builder.DAG.getTargetLoweringInfo(), Reg, + ISP.getActualReturnType()); + SDValue Chain = Builder.DAG.getEntryNode(); + + RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain, + nullptr); + PendingExports.push_back(Chain); + Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg; + } else { + // The value of the statepoint itself will be the value of call itself. + // We'll replace the actually call node shortly. gc_result will grab + // this value. + Builder.setValue(CS.getInstruction(), ReturnValue); + } } else { // The token value is never used from here on, just generate a poison value - Builder.setValue(&CI, Builder.DAG.getIntPtrConstant(-1)); - } - // Remove the fake entry we created so we don't have a hanging reference - // after we delete this node. - Builder.removeValue(Tmp); - delete Tmp; - Tmp = nullptr; - - // Search for the call node - // The following code is essentially reverse engineering X86's - // LowerCallTo. - SDNode *CallNode = nullptr; - - // We just emitted a call, so it should be last thing generated - SDValue Chain = Builder.DAG.getRoot(); - - // Find closest CALLSEQ_END walking back through lowered nodes if needed - SDNode *CallEnd = Chain.getNode(); - int Sanity = 0; - while (CallEnd->getOpcode() != ISD::CALLSEQ_END) { - CallEnd = CallEnd->getGluedNode(); - assert(CallEnd && "Can not find call node"); - assert(Sanity < 20 && "should have found call end already"); - Sanity++; + Builder.setValue(CS.getInstruction(), + Builder.DAG.getIntPtrConstant(-1, Builder.getCurSDLoc())); } - assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && - "Expected a callseq node."); - assert(CallEnd->getGluedNode()); - // Step back inside the CALLSEQ - CallNode = CallEnd->getGluedNode(); - return CallNode; + return CallEnd->getOperand(0).getNode(); } /// Callect all gc pointers coming into statepoint intrinsic, clean them up, @@ -300,24 +326,15 @@ static SDNode *lowerCallFromStatepoint(const CallInst &CI, /// Relocs - the gc_relocate corresponding to each base/ptr pair /// Elements of this arrays should be in one-to-one correspondence with each /// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call -static void -getIncomingStatepointGCValues(SmallVectorImpl<const Value *> &Bases, - SmallVectorImpl<const Value *> &Ptrs, - SmallVectorImpl<const Value *> &Relocs, - ImmutableCallSite Statepoint, - SelectionDAGBuilder &Builder) { - // Search for relocated pointers. Note that working backwards from the - // gc_relocates ensures that we only get pairs which are actually relocated - // and used after the statepoint. - // TODO: This logic should probably become a utility function in Statepoint.h - for (const User *U : cast<CallInst>(Statepoint.getInstruction())->users()) { - if (!isGCRelocate(U)) { - continue; - } - GCRelocateOperands relocateOpers(U); - Relocs.push_back(cast<Value>(U)); - Bases.push_back(relocateOpers.basePtr()); - Ptrs.push_back(relocateOpers.derivedPtr()); +static void getIncomingStatepointGCValues( + SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite, + SelectionDAGBuilder &Builder) { + for (GCRelocateOperands relocateOpers : + StatepointSite.getRelocates(StatepointSite)) { + Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction()); + Bases.push_back(relocateOpers.getBasePtr()); + Ptrs.push_back(relocateOpers.getDerivedPtr()); } // Remove any redundant llvm::Values which map to the same SDValue as another @@ -376,14 +393,13 @@ static void lowerIncomingStatepointValue(SDValue Incoming, // such in the stackmap. This is required so that the consumer can // parse any internal format to the deopt state. It also handles null // pointers and other constant pointers in GC states - Ops.push_back( - Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); - Ops.push_back(Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + pushStackMapConstant(Ops, Builder, C->getSExtValue()); } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { - // This handles allocas as arguments to the statepoint - const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); - Ops.push_back( - Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + // This handles allocas as arguments to the statepoint (this is only + // really meaningful for a deopt value. For GC, we'd be trying to + // relocate the address of the alloca itself?) + Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), + Incoming.getValueType())); } else { // Otherwise, locate a spill slot and explicitly spill it so it // can be found by the runtime later. We currently do not support @@ -408,15 +424,15 @@ static void lowerIncomingStatepointValue(SDValue Incoming, /// statepoint. The chain nodes will have already been created and the DAG root /// will be set to the last value spilled (if any were). static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, - ImmutableStatepoint Statepoint, + ImmutableStatepoint StatepointSite, SelectionDAGBuilder &Builder) { // Lower the deopt and gc arguments for this statepoint. Layout will // be: deopt argument length, deopt arguments.., gc arguments... SmallVector<const Value *, 64> Bases, Ptrs, Relocations; - getIncomingStatepointGCValues(Bases, Ptrs, Relocations, - Statepoint.getCallSite(), Builder); + getIncomingStatepointGCValues(Bases, Ptrs, Relocations, StatepointSite, + Builder); #ifndef NDEBUG // Check that each of the gc pointer and bases we've gotten out of the @@ -424,61 +440,54 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // heap. This is basically just here to help catch errors during statepoint // insertion. TODO: This should actually be in the Verifier, but we can't get // to the GCStrategy from there (yet). - if (Builder.GFI) { - GCStrategy &S = Builder.GFI->getStrategy(); - for (const Value *V : Bases) { - auto Opt = S.isGCManagedPointer(V); - if (Opt.hasValue()) { - assert(Opt.getValue() && - "non gc managed base pointer found in statepoint"); - } + GCStrategy &S = Builder.GFI->getStrategy(); + for (const Value *V : Bases) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed base pointer found in statepoint"); } - for (const Value *V : Ptrs) { - auto Opt = S.isGCManagedPointer(V); - if (Opt.hasValue()) { - assert(Opt.getValue() && - "non gc managed derived pointer found in statepoint"); - } + } + for (const Value *V : Ptrs) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed derived pointer found in statepoint"); } - for (const Value *V : Relocations) { - auto Opt = S.isGCManagedPointer(V); - if (Opt.hasValue()) { - assert(Opt.getValue() && "non gc managed pointer relocated"); - } + } + for (const Value *V : Relocations) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && "non gc managed pointer relocated"); } } #endif - - // Before we actually start lowering (and allocating spill slots for values), // reserve any stack slots which we judge to be profitable to reuse for a // particular value. This is purely an optimization over the code below and // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. - for (auto I = Statepoint.vm_state_begin() + 1, E = Statepoint.vm_state_end(); - I != E; ++I) { - Value *V = *I; + for (const Value *V : StatepointSite.vm_state_args()) { SDValue Incoming = Builder.getValue(V); reservePreviousStackSlotForValue(Incoming, Builder); } - for (unsigned i = 0; i < Bases.size() * 2; ++i) { - // Even elements will contain base, odd elements - derived ptr - const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2]; - SDValue Incoming = Builder.getValue(V); - reservePreviousStackSlotForValue(Incoming, Builder); + for (unsigned i = 0; i < Bases.size(); ++i) { + const Value *Base = Bases[i]; + reservePreviousStackSlotForValue(Builder.getValue(Base), Builder); + + const Value *Ptr = Ptrs[i]; + reservePreviousStackSlotForValue(Builder.getValue(Ptr), Builder); } // First, prefix the list with the number of unique values to be // lowered. Note that this is the number of *Values* not the // number of SDValues required to lower them. - const int NumVMSArgs = Statepoint.numTotalVMSArgs(); - Ops.push_back( - Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); - Ops.push_back(Builder.DAG.getTargetConstant(NumVMSArgs, MVT::i64)); + const int NumVMSArgs = StatepointSite.getNumTotalVMSArgs(); + pushStackMapConstant(Ops, Builder, NumVMSArgs); - assert(NumVMSArgs + 1 == std::distance(Statepoint.vm_state_begin(), - Statepoint.vm_state_end())); + assert(NumVMSArgs == std::distance(StatepointSite.vm_state_begin(), + StatepointSite.vm_state_end())); // The vm state arguments are lowered in an opaque manner. We do // not know what type of values are contained within. We skip the @@ -486,9 +495,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // explicitly just above. We could have left it in the loop and // not done it explicitly, but it's far easier to understand this // way. - for (auto I = Statepoint.vm_state_begin() + 1, E = Statepoint.vm_state_end(); - I != E; ++I) { - const Value *V = *I; + for (const Value *V : StatepointSite.vm_state_args()) { SDValue Incoming = Builder.getValue(V); lowerIncomingStatepointValue(Incoming, Ops, Builder); } @@ -498,35 +505,96 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // arrays interwoven with each (lowered) base pointer immediately followed by // it's (lowered) derived pointer. i.e // (base[0], ptr[0], base[1], ptr[1], ...) - for (unsigned i = 0; i < Bases.size() * 2; ++i) { - // Even elements will contain base, odd elements - derived ptr - const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2]; + for (unsigned i = 0; i < Bases.size(); ++i) { + const Value *Base = Bases[i]; + lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder); + + const Value *Ptr = Ptrs[i]; + lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder); + } + + // If there are any explicit spill slots passed to the statepoint, record + // them, but otherwise do not do anything special. These are user provided + // allocas and give control over placement to the consumer. In this case, + // it is the contents of the slot which may get updated, not the pointer to + // the alloca + for (Value *V : StatepointSite.gc_args()) { SDValue Incoming = Builder.getValue(V); - lowerIncomingStatepointValue(Incoming, Ops, Builder); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { + // This handles allocas as arguments to the statepoint + Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), + Incoming.getValueType())); + } + } + + // Record computed locations for all lowered values. + // This can not be embedded in lowering loops as we need to record *all* + // values, while previous loops account only values with unique SDValues. + const Instruction *StatepointInstr = + StatepointSite.getCallSite().getInstruction(); + FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = + Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr]; + + for (GCRelocateOperands RelocateOpers : + StatepointSite.getRelocates(StatepointSite)) { + const Value *V = RelocateOpers.getDerivedPtr(); + SDValue SDV = Builder.getValue(V); + SDValue Loc = Builder.StatepointLowering.getLocation(SDV); + + if (Loc.getNode()) { + SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); + } else { + // Record value as visited, but not spilled. This is case for allocas + // and constants. For this values we can avoid emiting spill load while + // visiting corresponding gc_relocate. + // Actually we do not need to record them in this map at all. + // We do this only to check that we are not relocating any unvisited value. + SpillMap[V] = None; + + // Default llvm mechanisms for exporting values which are used in + // different basic blocks does not work for gc relocates. + // Note that it would be incorrect to teach llvm that all relocates are + // uses of the corresponging values so that it would automatically + // export them. Relocates of the spilled values does not use original + // value. + if (StatepointSite.getCallSite().isInvoke()) + Builder.ExportFromCurrentBlock(V); + } } } + void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { + // Check some preconditions for sanity + assert(isStatepoint(&CI) && + "function called must be the statepoint function"); + + LowerStatepoint(ImmutableStatepoint(&CI)); +} + +void SelectionDAGBuilder::LowerStatepoint( + ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. - // Check some preconditions for sanity - assert(isStatepoint(&CI) && - "function called must be the statepoint function"); NumOfStatepoints++; // Clear state StatepointLowering.startNewStatepoint(*this); + ImmutableCallSite CS(ISP.getCallSite()); + #ifndef NDEBUG - // Consistency check - for (const User *U : CI.users()) { - const CallInst *Call = cast<CallInst>(U); - if (isGCRelocate(Call)) - StatepointLowering.scheduleRelocCall(*Call); + // Consistency check. Don't do this for invokes. It would be too + // expensive to preserve this information across different basic blocks + if (!CS.isInvoke()) { + for (const User *U : CS->users()) { + const CallInst *Call = cast<CallInst>(U); + if (isGCRelocate(Call)) + StatepointLowering.scheduleRelocCall(*Call); + } } #endif - ImmutableStatepoint ISP(&CI); #ifndef NDEBUG // If this is a malformed statepoint, report it early to simplify debugging. // This should catch any IR level mistake that's made when constructing or @@ -534,42 +602,82 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { ISP.verify(); // Check that the associated GCStrategy expects to encounter statepoints. - // TODO: This if should become an assert. For now, we allow the GCStrategy - // to be optional for backwards compatibility. This will only last a short - // period (i.e. a couple of weeks). - if (GFI) { - assert(GFI->getStrategy().useStatepoints() && - "GCStrategy does not expect to encounter statepoints"); - } + assert(GFI->getStrategy().useStatepoints() && + "GCStrategy does not expect to encounter statepoints"); #endif - // Lower statepoint vmstate and gcstate arguments - SmallVector<SDValue, 10> LoweredArgs; - lowerStatepointMetaArgs(LoweredArgs, ISP, *this); + SmallVector<SDValue, 10> LoweredMetaArgs; + lowerStatepointMetaArgs(LoweredMetaArgs, ISP, *this); // Get call node, we will replace it later with statepoint - SDNode *CallNode = lowerCallFromStatepoint(CI, *this); + SDNode *CallNode = + lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports); - // Construct the actual STATEPOINT node with all the appropriate arguments - // and return values. + // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END + // nodes with all the appropriate arguments and return values. + + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + SDValue Chain = CallNode->getOperand(0); + + SDValue Glue; + bool CallHasIncomingGlue = CallNode->getGluedNode(); + if (CallHasIncomingGlue) { + // Glue is always last operand + Glue = CallNode->getOperand(CallNode->getNumOperands() - 1); + } + + // Build the GC_TRANSITION_START node if necessary. + // + // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the + // order in which they appear in the call to the statepoint intrinsic. If + // any of the operands is a pointer-typed, that operand is immediately + // followed by a SRCVALUE for the pointer that may be used during lowering + // (e.g. to form MachinePointerInfo values for loads/stores). + const bool IsGCTransition = + (ISP.getFlags() & (uint64_t)StatepointFlags::GCTransition) == + (uint64_t)StatepointFlags::GCTransition; + if (IsGCTransition) { + SmallVector<SDValue, 8> TSOps; + + // Add chain + TSOps.push_back(Chain); + + // Add GC transition arguments + for (const Value *V : ISP.gc_transition_args()) { + TSOps.push_back(getValue(V)); + if (V->getType()->isPointerTy()) + TSOps.push_back(DAG.getSrcValue(V)); + } + + // Add glue if necessary + if (CallHasIncomingGlue) + TSOps.push_back(Glue); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SDValue GCTransitionStart = + DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps); + + Chain = GCTransitionStart.getValue(0); + Glue = GCTransitionStart.getValue(1); + } // TODO: Currently, all of these operands are being marked as read/write in // PrologEpilougeInserter.cpp, we should special case the VMState arguments // and flags to be read-only. SmallVector<SDValue, 40> Ops; + // Add the <id> and <numBytes> constants. + Ops.push_back(DAG.getTargetConstant(ISP.getID(), getCurSDLoc(), MVT::i64)); + Ops.push_back( + DAG.getTargetConstant(ISP.getNumPatchBytes(), getCurSDLoc(), MVT::i32)); + // Calculate and push starting position of vmstate arguments - // Call Node: Chain, Target, {Args}, RegMask, [Glue] - SDValue Glue; - if (CallNode->getGluedNode()) { - // Glue is always last operand - Glue = CallNode->getOperand(CallNode->getNumOperands() - 1); - } // Get number of arguments incoming directly into call node unsigned NumCallRegArgs = - CallNode->getNumOperands() - (Glue.getNode() ? 4 : 3); - Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3); + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32)); // Add call target SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0); @@ -578,47 +686,74 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { // Add call arguments // Get position of register mask in the call SDNode::op_iterator RegMaskIt; - if (Glue.getNode()) + if (CallHasIncomingGlue) RegMaskIt = CallNode->op_end() - 2; else RegMaskIt = CallNode->op_end() - 1; Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt); - // Add a leading constant argument with the Flags and the calling convention - // masked together - CallingConv::ID CallConv = CI.getCallingConv(); - int Flags = dyn_cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue(); - assert(Flags == 0 && "not expected to be used"); - Ops.push_back(DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); - Ops.push_back( - DAG.getTargetConstant(Flags | ((unsigned)CallConv << 1), MVT::i64)); + // Add a constant argument for the calling convention + pushStackMapConstant(Ops, *this, CS.getCallingConv()); + + // Add a constant argument for the flags + uint64_t Flags = ISP.getFlags(); + assert( + ((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) + && "unknown flag used"); + pushStackMapConstant(Ops, *this, Flags); // Insert all vmstate and gcstate arguments - Ops.insert(Ops.end(), LoweredArgs.begin(), LoweredArgs.end()); + Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end()); // Add register mask from call node Ops.push_back(*RegMaskIt); // Add chain - Ops.push_back(CallNode->getOperand(0)); + Ops.push_back(Chain); // Same for the glue, but we add it only if original call had it if (Glue.getNode()) Ops.push_back(Glue); - // Compute return values - SmallVector<EVT, 21> ValueVTs; - ValueVTs.push_back(MVT::Other); - ValueVTs.push_back(MVT::Glue); // provide a glue output since we consume one - // as input. This allows someone else to chain - // off us as needed. - SDVTList NodeTys = DAG.getVTList(ValueVTs); + // Compute return values. Provide a glue output since we consume one as + // input. This allows someone else to chain off us as needed. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SDNode *StatepointMCNode = + DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); + + SDNode *SinkNode = StatepointMCNode; - SDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT, - getCurSDLoc(), NodeTys, Ops); + // Build the GC_TRANSITION_END node if necessary. + // + // See the comment above regarding GC_TRANSITION_START for the layout of + // the operands to the GC_TRANSITION_END node. + if (IsGCTransition) { + SmallVector<SDValue, 8> TEOps; + + // Add chain + TEOps.push_back(SDValue(StatepointMCNode, 0)); + + // Add GC transition arguments + for (const Value *V : ISP.gc_transition_args()) { + TEOps.push_back(getValue(V)); + if (V->getType()->isPointerTy()) + TEOps.push_back(DAG.getSrcValue(V)); + } + + // Add glue + TEOps.push_back(SDValue(StatepointMCNode, 1)); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SDValue GCTransitionStart = + DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps); + + SinkNode = GCTransitionStart.getNode(); + } // Replace original call - DAG.ReplaceAllUsesWith(CallNode, StatepointMCNode); // This may update Root + DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root // Remove originall call node DAG.DeleteNode(CallNode); @@ -636,49 +771,72 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { // The result value of the gc_result is simply the result of the actual // call. We've already emitted this, so just grab the value. Instruction *I = cast<Instruction>(CI.getArgOperand(0)); - assert(isStatepoint(I) && - "first argument must be a statepoint token"); - - setValue(&CI, getValue(I)); + assert(isStatepoint(I) && "first argument must be a statepoint token"); + + if (isa<InvokeInst>(I)) { + // For invokes we should have stored call result in a virtual register. + // We can not use default getValue() functionality to copy value from this + // register because statepoint and actuall call return types can be + // different, and getValue() will use CopyFromReg of the wrong type, + // which is always i32 in our case. + PointerType *CalleeType = + cast<PointerType>(ImmutableStatepoint(I).getActualCallee()->getType()); + Type *RetTy = + cast<FunctionType>(CalleeType->getElementType())->getReturnType(); + SDValue CopyFromReg = getCopyFromRegs(I, RetTy); + + assert(CopyFromReg.getNode()); + setValue(&CI, CopyFromReg); + } else { + setValue(&CI, getValue(I)); + } } void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { + GCRelocateOperands RelocateOpers(&CI); + #ifndef NDEBUG // Consistency check - StatepointLowering.relocCallVisited(CI); + // We skip this check for invoke statepoints. It would be too expensive to + // preserve validation info through different basic blocks. + if (!RelocateOpers.isTiedToInvoke()) { + StatepointLowering.relocCallVisited(CI); + } #endif - GCRelocateOperands relocateOpers(&CI); - SDValue SD = getValue(relocateOpers.derivedPtr()); + const Value *DerivedPtr = RelocateOpers.getDerivedPtr(); + SDValue SD = getValue(DerivedPtr); + + FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = + FuncInfo.StatepointRelocatedValues[RelocateOpers.getStatepoint()]; - if (isa<ConstantSDNode>(SD) || isa<FrameIndexSDNode>(SD)) { - // We didn't need to spill these special cases (constants and allocas). - // See the handling in spillIncomingValueForStatepoint for detail. + // We should have recorded location for this pointer + assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value"); + Optional<int> DerivedPtrLocation = SpillMap[DerivedPtr]; + + // We didn't need to spill these special cases (constants and allocas). + // See the handling in spillIncomingValueForStatepoint for detail. + if (!DerivedPtrLocation) { setValue(&CI, SD); return; } - SDValue Loc = StatepointLowering.getRelocLocation(SD); - // Emit new load if we did not emit it before - if (!Loc.getNode()) { - SDValue SpillSlot = StatepointLowering.getLocation(SD); - int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation, + SD.getValueType()); - // Be conservative: flush all pending loads - // TODO: Probably we can be less restrictive on this, - // it may allow more scheduling opprtunities - SDValue Chain = getRoot(); + // Be conservative: flush all pending loads + // TODO: Probably we can be less restrictive on this, + // it may allow more scheduling opprtunities + SDValue Chain = getRoot(); - Loc = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, - SpillSlot, MachinePointerInfo::getFixedStack(FI), false, - false, false, 0); + SDValue SpillLoad = + DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, + MachinePointerInfo::getFixedStack(*DerivedPtrLocation), + false, false, false, 0); - StatepointLowering.setRelocLocation(SD, Loc); + // Again, be conservative, don't emit pending loads + DAG.setRoot(SpillLoad.getValue(1)); - // Again, be conservative, don't emit pending loads - DAG.setRoot(Loc.getValue(1)); - } - - assert(Loc.getNode()); - setValue(&CI, Loc); + assert(SpillLoad.getNode()); + setValue(&CI, SpillLoad); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h index 673112c..82d0c62 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -30,8 +30,7 @@ class SelectionDAGBuilder; /// works in concert with information in FunctionLoweringInfo. class StatepointLoweringState { public: - StatepointLoweringState() : NextSlotToAllocate(0) { - } + StatepointLoweringState() : NextSlotToAllocate(0) {} /// Reset all state tracking for a newly encountered safepoint. Also /// performs some consistency checking. @@ -57,25 +56,6 @@ public: Locations[val] = Location; } - /// Returns the relocated value for a given input pointer. Will - /// return SDValue() if this value hasn't yet been reloaded from - /// it's stack slot after the statepoint. Otherwise, the value - /// has already been reloaded and the SDValue of that reload will - /// be returned. Note that VMState values are spilled but not - /// reloaded (since they don't change at the safepoint unless - /// also listed in the GC pointer section) and will thus never - /// be in this map - SDValue getRelocLocation(SDValue val) { - if (!RelocLocations.count(val)) - return SDValue(); - return RelocLocations[val]; - } - void setRelocLocation(SDValue val, SDValue Location) { - assert(!RelocLocations.count(val) && - "Trying to allocate already allocated location"); - RelocLocations[val] = Location; - } - /// Record the fact that we expect to encounter a given gc_relocate /// before the next statepoint. If we don't see it, we'll report /// an assertion. @@ -118,8 +98,6 @@ private: /// Maps pre-relocation value (gc pointer directly incoming into statepoint) /// into it's location (currently only stack slots) DenseMap<SDValue, SDValue> Locations; - /// Map pre-relocated value into it's new relocated location - DenseMap<SDValue, SDValue> RelocLocations; /// A boolean indicator for each slot listed in the FunctionInfo as to /// whether it has been used in the current statepoint. Since we try to diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f12c035..833da4b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -100,6 +100,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); Args.push_back(Entry); } + if (LC == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported library call operation!"); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); @@ -201,7 +203,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue Ops[2] = { NewLHS, NewRHS }; NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl).first; - NewRHS = DAG.getConstant(0, RetVT); + NewRHS = DAG.getConstant(0, dl, RetVT); CCCode = getCmpLibcallCC(LC1); if (LC2 != RTLIB::UNKNOWN_LIBCALL) { SDValue Tmp = DAG.getNode(ISD::SETCC, dl, @@ -303,7 +305,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), DAG.getConstant(Demanded & C->getAPIntValue(), - VT)); + dl, VT)); return CombineTo(Op, New); } @@ -447,7 +449,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, Op.getOperand(1)); // If all of the demanded bits in the inputs are known zeros, return zero. if ((NewMask & (KnownZero|KnownZero2)) == NewMask) - return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); // If the RHS is a constant, see if we can simplify it. if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) return true; @@ -535,7 +537,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side if (KnownOne == KnownOne2) { // set bits are the same on both sides EVT VT = Op.getValueType(); - SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); + SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), ANDC)); } @@ -551,7 +553,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Expanded != C->getAPIntValue()) { EVT VT = Op.getValueType(); SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), - TLO.DAG.getConstant(Expanded, VT)); + TLO.DAG.getConstant(Expanded, dl, VT)); return TLO.CombineTo(Op, New); } // if it already has all the bits set, nothing to change @@ -624,7 +626,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } SDValue NewSA = - TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); EVT VT = Op.getValueType(); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); @@ -648,7 +650,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, ShTy = InnerVT; SDValue NarrowShl = TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, - TLO.DAG.getConstant(ShAmt, ShTy)); + TLO.DAG.getConstant(ShAmt, dl, ShTy)); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), @@ -670,7 +672,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 && NewMask.trunc(ShAmt) == 0) { SDValue NewSA = - TLO.DAG.getConstant(ShAmt - InnerShAmt, + TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, Op.getOperand(1).getValueType()); EVT VT = Op.getValueType(); SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, @@ -713,7 +715,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } SDValue NewSA = - TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); } @@ -778,7 +780,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Log2 >= 0) { // The bit must come from the sign. SDValue NewSA = - TLO.DAG.getConstant(BitWidth - 1 - Log2, + TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op.getOperand(1).getValueType()); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), NewSA)); @@ -794,19 +796,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); // If we only care about the highest bit, don't bother shifting right. - if (MsbMask == DemandedMask) { + if (MsbMask == NewMask) { unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); SDValue InOp = Op.getOperand(0); - - // Compute the correct shift amount type, which must be getShiftAmountTy - // for scalar types after legalization. - EVT ShiftAmtTy = Op.getValueType(); - if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) - ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); - - SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, - Op.getValueType(), InOp, ShiftAmt)); + unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits(); + bool AlreadySignExtended = + TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1; + // However if the input is already sign extended we expect the sign + // extension to be dropped altogether later and do not simplify. + if (!AlreadySignExtended) { + // Compute the correct shift amount type, which must be getShiftAmountTy + // for scalar types after legalization. + EVT ShiftAmtTy = Op.getValueType(); + if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) + ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); + + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl, + ShiftAmtTy); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), InOp, + ShiftAmt)); + } } // Sign extension. Compute the demanded bits in the result that are not @@ -985,7 +995,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.LegalTypes()) { uint64_t ShVal = ShAmt->getZExtValue(); Shift = - TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType())); + TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(Op.getValueType())); } APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, @@ -1043,7 +1053,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (!OpVTLegal && OpVTSizeInBits > 32) Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); unsigned ShVal = Op.getValueType().getSizeInBits()-1; - SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType()); + SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType()); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, Op.getValueType(), Sign, ShAmt)); @@ -1076,8 +1086,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If we know the value of all of the demanded bits, return this as a // constant. - if ((NewMask & (KnownZero|KnownOne)) == NewMask) - return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { + // Avoid folding to a constant if any OpaqueConstant is involved. + const SDNode *N = Op.getNode(); + for (SDNodeIterator I = SDNodeIterator::begin(N), + E = SDNodeIterator::end(N); I != E; ++I) { + SDNode *Op = *I; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) + if (C->isOpaque()) + return false; + } + return TLO.CombineTo(Op, + TLO.DAG.getConstant(KnownOne, dl, Op.getValueType())); + } return false; } @@ -1213,13 +1234,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, switch (Cond) { default: break; case ISD::SETFALSE: - case ISD::SETFALSE2: return DAG.getConstant(0, VT); + case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT); case ISD::SETTRUE: case ISD::SETTRUE2: { TargetLowering::BooleanContent Cnt = getBooleanContents(N0->getValueType(0)); return DAG.getConstant( - Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, + VT); } } @@ -1253,7 +1275,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (srl (ctlz x), 5) == 1 -> X == 0 Cond = ISD::SETEQ; } - SDValue Zero = DAG.getConstant(0, N0.getValueType()); + SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero, Cond); } @@ -1274,10 +1296,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (ctpop x) u> 1 -> (x & x-1) != 0 if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, - DAG.getConstant(1, CTVT)); + DAG.getConstant(1, dl, CTVT)); SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; - return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC); + return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); } // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. @@ -1331,7 +1353,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { // Will get folded away. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt); - SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT); + SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT); return DAG.getSetCC(dl, VT, Trunc, C, Cond); } } @@ -1382,7 +1404,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue Ptr = Lod->getBasePtr(); if (bestOffset != 0) Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), - DAG.getConstant(bestOffset, PtrType)); + DAG.getConstant(bestOffset, dl, PtrType)); unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, Lod->getPointerInfo().getWithOffset(bestOffset), @@ -1390,8 +1412,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), - newVT)), - DAG.getConstant(0LL, newVT), Cond); + dl, newVT)), + DAG.getConstant(0LL, dl, newVT), Cond); } } } @@ -1407,18 +1429,18 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, switch (Cond) { case ISD::SETUGT: case ISD::SETUGE: - case ISD::SETEQ: return DAG.getConstant(0, VT); + case ISD::SETEQ: return DAG.getConstant(0, dl, VT); case ISD::SETULT: case ISD::SETULE: - case ISD::SETNE: return DAG.getConstant(1, VT); + case ISD::SETNE: return DAG.getConstant(1, dl, VT); case ISD::SETGT: case ISD::SETGE: // True if the sign bit of C1 is set. - return DAG.getConstant(C1.isNegative(), VT); + return DAG.getConstant(C1.isNegative(), dl, VT); case ISD::SETLT: case ISD::SETLE: // True if the sign bit of C1 isn't set. - return DAG.getConstant(C1.isNonNegative(), VT); + return DAG.getConstant(C1.isNonNegative(), dl, VT); default: break; } @@ -1437,7 +1459,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (isOperationLegal(ISD::SETCC, newVT) && getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) { EVT NewSetCCVT = getSetCCResultType(*DAG.getContext(), newVT); - SDValue NewConst = DAG.getConstant(C1.trunc(InSize), newVT); + SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), NewConst, Cond); @@ -1458,7 +1480,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the constant doesn't fit into the number of bits for the source of // the sign extension, it is impossible for both sides to be equal. if (C1.getMinSignedBits() > ExtSrcTyBits) - return DAG.getConstant(Cond == ISD::SETNE, VT); + return DAG.getConstant(Cond == ISD::SETNE, dl, VT); SDValue ZextOp; EVT Op0Ty = N0.getOperand(0).getValueType(); @@ -1467,7 +1489,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } else { APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), - DAG.getConstant(Imm, Op0Ty)); + DAG.getConstant(Imm, dl, Op0Ty)); } if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(ZextOp.getNode()); @@ -1476,7 +1498,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getConstant(C1 & APInt::getLowBitsSet( ExtDstTyBits, ExtSrcTyBits), - ExtDstTy), + dl, ExtDstTy), Cond); } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { @@ -1546,20 +1568,20 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Op0.getValueType().bitsGT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)), - DAG.getConstant(1, VT)); + DAG.getConstant(1, dl, VT)); else if (Op0.getValueType().bitsLT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)), - DAG.getConstant(1, VT)); + DAG.getConstant(1, dl, VT)); return DAG.getSetCC(dl, VT, Op0, - DAG.getConstant(0, Op0.getValueType()), + DAG.getConstant(0, dl, Op0.getValueType()), Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } if (Op0.getOpcode() == ISD::AssertZext && cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1) return DAG.getSetCC(dl, VT, Op0, - DAG.getConstant(0, Op0.getValueType()), + DAG.getConstant(0, dl, Op0.getValueType()), Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } } @@ -1576,7 +1598,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Canonicalize GE/LE comparisons to use GT/LT comparisons. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { - if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true + if (C1 == MinVal) return DAG.getConstant(1, dl, VT); // X >= MIN --> true // X >= C0 --> X > (C0 - 1) APInt C = C1 - 1; ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; @@ -1585,13 +1607,13 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && isLegalICmpImmediate(C.getSExtValue())))) { return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, N1.getValueType()), + DAG.getConstant(C, dl, N1.getValueType()), NewCC); } } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { - if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true + if (C1 == MaxVal) return DAG.getConstant(1, dl, VT); // X <= MAX --> true // X <= C0 --> X < (C0 + 1) APInt C = C1 + 1; ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; @@ -1600,19 +1622,19 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && isLegalICmpImmediate(C.getSExtValue())))) { return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, N1.getValueType()), + DAG.getConstant(C, dl, N1.getValueType()), NewCC); } } if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) - return DAG.getConstant(0, VT); // X < MIN --> false + return DAG.getConstant(0, dl, VT); // X < MIN --> false if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) - return DAG.getConstant(1, VT); // X >= MIN --> true + return DAG.getConstant(1, dl, VT); // X >= MIN --> true if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) - return DAG.getConstant(0, VT); // X > MAX --> false + return DAG.getConstant(0, dl, VT); // X > MAX --> false if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) - return DAG.getConstant(1, VT); // X <= MAX --> true + return DAG.getConstant(1, dl, VT); // X <= MAX --> true // Canonicalize setgt X, Min --> setne X, Min if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) @@ -1624,12 +1646,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If we have setult X, 1, turn it into seteq X, 0 if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MinVal, N0.getValueType()), + DAG.getConstant(MinVal, dl, N0.getValueType()), ISD::SETEQ); // If we have setugt X, Max-1, turn it into seteq X, Max if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MaxVal, N0.getValueType()), + DAG.getConstant(MaxVal, dl, N0.getValueType()), ISD::SETEQ); // If we have "setcc X, C0", check to see if we can shrink the immediate @@ -1639,14 +1661,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETUGT && C1 == APInt::getSignedMaxValue(OperandBitSize)) return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(0, N1.getValueType()), + DAG.getConstant(0, dl, N1.getValueType()), ISD::SETLT); // SETULT X, SINTMIN -> SETGT X, -1 if (Cond == ISD::SETULT && C1 == APInt::getSignedMinValue(OperandBitSize)) { SDValue ConstMinusOne = - DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, N1.getValueType()); return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); } @@ -1665,7 +1687,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (AndRHS->getAPIntValue().isPowerOf2()) { return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy))); + DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl, + ShiftTy))); } } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { // (X & 8) == 8 --> (X & 8) >> 3 @@ -1673,7 +1696,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (C1.isPowerOf2()) { return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(C1.logBase2(), ShiftTy))); + DAG.getConstant(C1.logBase2(), dl, + ShiftTy))); } } } @@ -1692,8 +1716,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), - DAG.getConstant(ShiftBits, ShiftTy)); - SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy); + DAG.getConstant(ShiftBits, dl, + ShiftTy)); + SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy); return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); } } @@ -1715,13 +1740,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ShiftBits = C1.countTrailingZeros(); } NewC = NewC.lshr(ShiftBits); - if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) { + if (ShiftBits && NewC.getMinSignedBits() <= 64 && + isLegalICmpImmediate(NewC.getSExtValue())) { EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, - DAG.getConstant(ShiftBits, ShiftTy)); - SDValue CmpRHS = DAG.getConstant(NewC, CmpTy); + DAG.getConstant(ShiftBits, dl, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy); return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); } } @@ -1740,9 +1766,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, switch (ISD::getUnorderedFlavor(Cond)) { default: llvm_unreachable("Unknown flavor!"); case 0: // Known false. - return DAG.getConstant(0, VT); + return DAG.getConstant(0, dl, VT); case 1: // Known true. - return DAG.getConstant(1, VT); + return DAG.getConstant(1, dl, VT); case 2: // Undefined. return DAG.getUNDEF(VT); } @@ -1809,13 +1835,13 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // We can always fold X == X for integer setcc's. if (N0.getValueType().isInteger()) { - return DAG.getConstant(EqVal, VT); + return DAG.getConstant(EqVal, dl, VT); } unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. - return DAG.getConstant(EqVal, VT); + return DAG.getConstant(EqVal, dl, VT); if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) - return DAG.getConstant(EqVal, VT); + return DAG.getConstant(EqVal, dl, VT); // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; @@ -1856,7 +1882,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0.getOperand(0), DAG.getConstant(RHSC->getAPIntValue()- LHSR->getAPIntValue(), - N0.getValueType()), Cond); + dl, N0.getValueType()), Cond); } // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. @@ -1868,7 +1894,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getSetCC(dl, VT, N0.getOperand(0), DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(), - N0.getValueType()), + dl, N0.getValueType()), Cond); } @@ -1879,7 +1905,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getSetCC(dl, VT, N0.getOperand(1), DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(), - N0.getValueType()), + dl, N0.getValueType()), Cond); } } @@ -1896,16 +1922,18 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (!LegalRHSImm || N0.getNode()->hasOneUse()) { if (N0.getOperand(0) == N1) return DAG.getSetCC(dl, VT, N0.getOperand(1), - DAG.getConstant(0, N0.getValueType()), Cond); + DAG.getConstant(0, dl, N0.getValueType()), Cond); if (N0.getOperand(1) == N1) { if (DAG.isCommutativeBinOp(N0.getOpcode())) return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(0, N0.getValueType()), Cond); + DAG.getConstant(0, dl, N0.getValueType()), + Cond); if (N0.getNode()->hasOneUse()) { assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); // (Z-X) == X --> Z == X<<1 SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, - DAG.getConstant(1, getShiftAmountTy(N1.getValueType()))); + DAG.getConstant(1, dl, + getShiftAmountTy(N1.getValueType()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); @@ -1919,16 +1947,17 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Simplify X == (X+Z) --> Z == 0 if (N1.getOperand(0) == N0) return DAG.getSetCC(dl, VT, N1.getOperand(1), - DAG.getConstant(0, N1.getValueType()), Cond); + DAG.getConstant(0, dl, N1.getValueType()), Cond); if (N1.getOperand(1) == N0) { if (DAG.isCommutativeBinOp(N1.getOpcode())) return DAG.getSetCC(dl, VT, N1.getOperand(0), - DAG.getConstant(0, N1.getValueType()), Cond); + DAG.getConstant(0, dl, N1.getValueType()), Cond); if (N1.getNode()->hasOneUse()) { assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); // X == (Z-X) --> X<<1 == Z SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, - DAG.getConstant(1, getShiftAmountTy(N0.getValueType()))); + DAG.getConstant(1, dl, + getShiftAmountTy(N0.getValueType()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); @@ -1946,7 +1975,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); if (DCI.isBeforeLegalizeOps() || isCondCodeLegal(Cond, N0.getSimpleValueType())) { - SDValue Zero = DAG.getConstant(0, N1.getValueType()); + SDValue Zero = DAG.getConstant(0, dl, N1.getValueType()); return DAG.getSetCC(dl, VT, N0, Zero, Cond); } } @@ -1957,7 +1986,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); if (DCI.isBeforeLegalizeOps() || isCondCodeLegal(Cond, N1.getSimpleValueType())) { - SDValue Zero = DAG.getConstant(0, N0.getValueType()); + SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); return DAG.getSetCC(dl, VT, N1, Zero, Cond); } } @@ -2172,7 +2201,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // now; without this it would get ZExt'd later in // ScheduleDAGSDNodes::EmitNode, which is very generic. Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), - MVT::i64)); + SDLoc(C), MVT::i64)); return; } } @@ -2181,9 +2210,10 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } } -std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { +std::pair<unsigned, const TargetRegisterClass *> +TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, + const std::string &Constraint, + MVT VT) const { if (Constraint.empty() || Constraint[0] != '{') return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); @@ -2195,8 +2225,6 @@ getRegForInlineAsmConstraint(const std::string &Constraint, std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr)); // Figure out which register class contains this reg. - const TargetRegisterInfo *RI = - getTargetMachine().getSubtargetImpl()->getRegisterInfo(); for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), E = RI->regclass_end(); RCI != E; ++RCI) { const TargetRegisterClass *RC = *RCI; @@ -2249,8 +2277,9 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { /// and also tie in the associated operand values. /// If this returns an empty vector, and if the constraint string itself /// isn't empty, there was an error parsing. -TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( - ImmutableCallSite CS) const { +TargetLowering::AsmOperandInfoVector +TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI, + ImmutableCallSite CS) const { /// ConstraintOperands - Information about all of the constraints. AsmOperandInfoVector ConstraintOperands; const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); @@ -2341,7 +2370,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( } // If we have multiple alternative constraints, select the best alternative. - if (ConstraintOperands.size()) { + if (!ConstraintOperands.empty()) { if (maCount) { unsigned bestMAIndex = 0; int bestWeight = -1; @@ -2412,12 +2441,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - std::pair<unsigned, const TargetRegisterClass*> MatchRC = - getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - std::pair<unsigned, const TargetRegisterClass*> InputRC = - getRegForInlineAsmConstraint(Input.ConstraintCode, - Input.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> MatchRC = + getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> InputRC = + getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { @@ -2640,7 +2669,8 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, unsigned ShAmt = d.countTrailingZeros(); if (ShAmt) { // TODO: For UDIV use SRL instead of SRA. - SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType())); + SDValue Amt = + DAG.getConstant(ShAmt, dl, getShiftAmountTy(Op1.getValueType())); Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false, true); d = d.ashr(ShAmt); @@ -2651,7 +2681,7 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, while ((t = d*xn) != 1) xn *= APInt(d.getBitWidth(), 2) - t; - Op2 = DAG.getConstant(xn, Op1.getValueType()); + Op2 = DAG.getConstant(xn, dl, Op1.getValueType()); return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); } @@ -2680,12 +2710,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) : isOperationLegalOrCustom(ISD::MULHS, VT)) Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), - DAG.getConstant(magics.m, VT)); + DAG.getConstant(magics.m, dl, VT)); else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N->getOperand(0), - DAG.getConstant(magics.m, VT)).getNode(), 1); + DAG.getConstant(magics.m, dl, VT)).getNode(), 1); else return SDValue(); // No mulhs or equvialent // If d > 0 and m < 0, add the numerator @@ -2701,12 +2731,13 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, // Shift right algebraic if shift value is nonzero if (magics.s > 0) { Q = DAG.getNode(ISD::SRA, dl, VT, Q, - DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); + DAG.getConstant(magics.s, dl, + getShiftAmountTy(Q.getValueType()))); Created->push_back(Q.getNode()); } // Extract the sign bit and add it to the quotient SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, - DAG.getConstant(VT.getScalarSizeInBits() - 1, + DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, getShiftAmountTy(Q.getValueType()))); Created->push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); @@ -2740,7 +2771,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, if (magics.a != 0 && !Divisor[0]) { unsigned Shift = Divisor.countTrailingZeros(); Q = DAG.getNode(ISD::SRL, dl, VT, Q, - DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType()))); + DAG.getConstant(Shift, dl, + getShiftAmountTy(Q.getValueType()))); Created->push_back(Q.getNode()); // Get magic number for the shifted divisor. @@ -2752,11 +2784,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, // FIXME: We should support doing a MUL in a wider type if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) : isOperationLegalOrCustom(ISD::MULHU, VT)) - Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT)); + Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT)); else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, - DAG.getConstant(magics.m, VT)).getNode(), 1); + DAG.getConstant(magics.m, dl, VT)).getNode(), 1); else return SDValue(); // No mulhu or equvialent @@ -2766,17 +2798,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, assert(magics.s < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); return DAG.getNode(ISD::SRL, dl, VT, Q, - DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); + DAG.getConstant(magics.s, dl, + getShiftAmountTy(Q.getValueType()))); } else { SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, - DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType()))); + DAG.getConstant(1, dl, + getShiftAmountTy(NPQ.getValueType()))); Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); Created->push_back(NPQ.getNode()); return DAG.getNode(ISD::SRL, dl, VT, NPQ, - DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType()))); + DAG.getConstant(magics.s - 1, dl, + getShiftAmountTy(NPQ.getValueType()))); } } @@ -2863,7 +2898,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, isOperationLegalOrCustom(ISD::SRL, VT) && isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits(); - SDValue Shift = DAG.getConstant(ShiftAmt, getShiftAmountTy(VT)); + SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT)); LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift); LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH); RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift); @@ -2913,13 +2948,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); - SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT); - SDValue ExponentLoBit = DAG.getConstant(23, IntVT); - SDValue Bias = DAG.getConstant(127, IntVT); - SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), + SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT); + SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT); + SDValue Bias = DAG.getConstant(127, dl, IntVT); + SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl, IntVT); - SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT); - SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT); + SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT); + SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT); SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); @@ -2935,7 +2970,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SDValue R = DAG.getNode(ISD::OR, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), - DAG.getConstant(0x00800000, IntVT)); + DAG.getConstant(0x00800000, dl, IntVT)); R = DAG.getZExtOrTrunc(R, dl, NVT); @@ -2955,7 +2990,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, DAG.getNode(ISD::XOR, dl, NVT, R, Sign), Sign); - Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT), - DAG.getConstant(0, NVT), Ret, ISD::SETLT); + Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT), + DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT); return true; } |