diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
19 files changed, 1196 insertions, 512 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e843f5f..4f0d2ca 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -138,6 +138,10 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); + void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + ISD::NodeType ExtType); + /// combine - call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. @@ -234,6 +238,9 @@ namespace { SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildUDIV(SDNode *N); + SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits = true); + SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); @@ -994,7 +1001,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { dbgs() << "\nWith: "; RV.getNode()->dump(&DAG); dbgs() << '\n'); - + // Transfer debug value. DAG.TransferDbgValues(SDValue(N, 0), RV); WorkListRemover DeadNodes(*this); @@ -1303,16 +1310,6 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); } -/// isCarryMaterialization - Returns true if V is an ADDE node that is known to -/// return 0 or 1 depending on the carry flag. -static bool isCarryMaterialization(SDValue V) { - if (V.getOpcode() != ISD::ADDE) - return false; - - ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0)); - return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1); -} - SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1476,18 +1473,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } - // add (adde 0, 0, glue), X -> adde X, 0, glue - if (N0->hasOneUse() && isCarryMaterialization(N0)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), - DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0), - N0.getOperand(2)); - - // add X, (adde 0, 0, glue) -> adde X, 0, glue - if (N1->hasOneUse() && isCarryMaterialization(N1)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), - DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0), - N1.getOperand(2)); - return SDValue(); } @@ -1531,16 +1516,6 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { N->getDebugLoc(), MVT::Glue)); } - // addc (adde 0, 0, glue), X -> adde X, 0, glue - if (N0->hasOneUse() && isCarryMaterialization(N0)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1, - DAG.getConstant(0, VT), N0.getOperand(2)); - - // addc X, (adde 0, 0, glue) -> adde X, 0, glue - if (N1->hasOneUse() && isCarryMaterialization(N1)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0, - DAG.getConstant(0, VT), N1.getOperand(2)); - return SDValue(); } @@ -1591,6 +1566,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : + dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); EVT VT = N0.getValueType(); // fold vector ops @@ -1622,6 +1599,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (A+B)-B -> A if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); + // fold C2-(A+C1) -> (C2-C1)-A + if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { + SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, + N1.getOperand(0)); + } // fold ((A+(B+or-C))-B) -> A+or-C if (N0.getOpcode() == ISD::ADD && (N0.getOperand(1).getOpcode() == ISD::SUB || @@ -2508,6 +2491,244 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(); } +/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 +/// +SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) + return SDValue(); + if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) + bool LookPassAnd0 = false; + bool LookPassAnd1 = false; + if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) + std::swap(N0, N1); + if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() == ISD::AND) { + if (!N0.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01C || N01C->getZExtValue() != 0xFF00) + return SDValue(); + N0 = N0.getOperand(0); + LookPassAnd0 = true; + } + + if (N1.getOpcode() == ISD::AND) { + if (!N1.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N11C || N11C->getZExtValue() != 0xFF) + return SDValue(); + N1 = N1.getOperand(0); + LookPassAnd1 = true; + } + + if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) + return SDValue(); + if (!N0.getNode()->hasOneUse() || + !N1.getNode()->hasOneUse()) + return SDValue(); + + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N01C || !N11C) + return SDValue(); + if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) + return SDValue(); + + // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) + SDValue N00 = N0->getOperand(0); + if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { + if (!N00.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); + if (!N001C || N001C->getZExtValue() != 0xFF) + return SDValue(); + N00 = N00.getOperand(0); + LookPassAnd0 = true; + } + + SDValue N10 = N1->getOperand(0); + if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { + if (!N10.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); + if (!N101C || N101C->getZExtValue() != 0xFF00) + return SDValue(); + N10 = N10.getOperand(0); + LookPassAnd1 = true; + } + + if (N00 != N10) + return SDValue(); + + // Make sure everything beyond the low halfword is zero since the SRL 16 + // will clear the top bits. + unsigned OpSizeInBits = VT.getSizeInBits(); + if (DemandHighBits && OpSizeInBits > 16 && + (!LookPassAnd0 || !LookPassAnd1) && + !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) + return SDValue(); + + SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); + if (OpSizeInBits > 16) + Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, + DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); + return Res; +} + +/// isBSwapHWordElement - Return true if the specified node is an element +/// that makes up a 32-bit packed halfword byteswap. i.e. +/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { + if (!N.getNode()->hasOneUse()) + return false; + + unsigned Opc = N.getOpcode(); + if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) + return false; + + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!N1C) + return false; + + unsigned Num; + switch (N1C->getZExtValue()) { + default: + return false; + case 0xFF: Num = 0; break; + case 0xFF00: Num = 1; break; + case 0xFF0000: Num = 2; break; + case 0xFF000000: Num = 3; break; + } + + // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). + SDValue N0 = N.getOperand(0); + if (Opc == ISD::AND) { + if (Num == 0 || Num == 2) { + // (x >> 8) & 0xff + // (x >> 8) & 0xff0000 + if (N0.getOpcode() != ISD::SRL) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { + // (x << 8) & 0xff00 + // (x << 8) & 0xff000000 + if (N0.getOpcode() != ISD::SHL) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + } else if (Opc == ISD::SHL) { + // (x & 0xff) << 8 + // (x & 0xff0000) << 8 + if (Num != 0 && Num != 2) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { // Opc == ISD::SRL + // (x & 0xff00) >> 8 + // (x & 0xff000000) >> 8 + if (Num != 1 && Num != 3) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + + if (Parts[Num]) + return false; + + Parts[Num] = N0.getOperand(0).getNode(); + return true; +} + +/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is +/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +/// => (rotl (bswap x), 16) +SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + SmallVector<SDNode*,4> Parts(4, (SDNode*)0); + // Look for either + // (or (or (and), (and)), (or (and), (and))) + // (or (or (or (and), (and)), (and)), (and)) + if (N0.getOpcode() != ISD::OR) + return SDValue(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + + if (N1.getOpcode() == ISD::OR) { + // (or (or (and), (and)), (or (and), (and))) + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + SDValue N010 = N01.getOperand(0); + if (!isBSwapHWordElement(N010, Parts)) + return SDValue(); + SDValue N011 = N01.getOperand(1); + if (!isBSwapHWordElement(N011, Parts)) + return SDValue(); + } else { + // (or (or (or (and), (and)), (and)), (and)) + if (!isBSwapHWordElement(N1, Parts)) + return SDValue(); + if (!isBSwapHWordElement(N01, Parts)) + return SDValue(); + if (N00.getOpcode() != ISD::OR) + return SDValue(); + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + } + + // Make sure the parts are all coming from the same node. + if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) + return SDValue(); + + SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, + SDValue(Parts[0],0)); + + // Result of the bswap should be rotated by 16. If it's not legal, than + // do (x << 16) | (x >> 16). + SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) + return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); + else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); +} + SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2543,6 +2764,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or x, c) -> c iff (x & ~c) == 0 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; + + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) + SDValue BSwap = MatchBSwapHWord(N, N0, N1); + if (BSwap.getNode() != 0) + return BSwap; + BSwap = MatchBSwapHWordLow(N, N0, N1); + if (BSwap.getNode() != 0) + return BSwap; + // reassociate or SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); if (ROR.getNode() != 0) @@ -3030,6 +3260,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl x, 0) -> x if (N1C && N1C->isNullValue()) return N0; + // fold (shl undef, x) -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) @@ -3696,6 +3929,28 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, return true; } +void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + ISD::NodeType ExtType) { + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), + &Ops[0], Ops.size())); + } +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -3784,27 +4039,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, - N->getDebugLoc(), VT, SOp)); - } - - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); - } - + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -3832,6 +4068,45 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } + // fold (sext (and/or/xor (load x), cst)) -> + // (and/or/xor (sextload x), (sext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::ZEXTLOAD) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + LN0->getMemoryVT(), + LN0->isVolatile(), + LN0->isNonTemporal(), + LN0->getAlignment()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.sext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ExtLoad, DAG.getConstant(Mask, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + N0.getOperand(0).getDebugLoc(), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::SIGN_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + if (N0.getOpcode() == ISD::SETCC) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. @@ -3990,27 +4265,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND, - N->getDebugLoc(), VT, SOp)); - } + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); + // fold (zext (and/or/xor (load x), cst)) -> + // (and/or/xor (zextload x), (zext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::SEXTLOAD) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + LN0->getMemoryVT(), + LN0->isVolatile(), + LN0->isNonTemporal(), + LN0->getAlignment()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ExtLoad, DAG.getConstant(Mask, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + N0.getOperand(0).getDebugLoc(), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } - - return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4198,27 +4494,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::ANY_EXTEND, - N->getDebugLoc(), VT, SOp)); - } - - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); - } - + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ANY_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4555,6 +4832,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } + + // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) + if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode() != 0) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + BSwap, N1); + } + return SDValue(); } @@ -5180,7 +5467,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + (Level == llvm::Unrestricted || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, @@ -5204,7 +5492,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + (Level == llvm::Unrestricted || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, @@ -5648,12 +5937,17 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Now check for #3 and #4. bool RealUse = false; + + // Caches for hasPredecessorHelper + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), E = Ptr.getNode()->use_end(); I != E; ++I) { SDNode *Use = *I; if (Use == N) continue; - if (Use->isPredecessorOf(N)) + if (N->hasPredecessorHelper(Use, Visited, Worklist)) return false; if (!((Use->getOpcode() == ISD::LOAD && @@ -6431,8 +6725,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" SDValue Shorter = GetDemandedBits(Value, - APInt::getLowBitsSet(Value.getValueSizeInBits(), - ST->getMemoryVT().getSizeInBits())); + APInt::getLowBitsSet( + Value.getValueType().getScalarType().getSizeInBits(), + ST->getMemoryVT().getScalarType().getSizeInBits())); AddToWorkList(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, @@ -7156,7 +7451,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, const TargetData &TD = *TLI.getTargetData(); // Create a ConstantArray of the two constants. - Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2); + Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), TD.getPrefTypeAlignment(FPTy)); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 797f174..54a7d43 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -547,7 +547,7 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. const DbgValueInst *DI = cast<DbgValueInst>(Call); - const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { // Currently the optimizer can produce this; insert an undef to @@ -556,9 +556,14 @@ bool FastISel::SelectCall(const User *I) { .addReg(0U).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) - .addImm(CI->getZExtValue()).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + if (CI->getBitWidth() > 64) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addCImm(CI).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(CI->getZExtValue()).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addFPImm(CF).addImm(DI->getOffset()) @@ -847,7 +852,7 @@ FastISel::SelectExtractValue(const User *U) { return false; // fast-isel can't handle aggregate constants at the moment // Get the actual result register, which is an offset from the base register. - unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->idx_begin(), EVI->idx_end()); + unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices()); SmallVector<EVT, 4> AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); @@ -1085,7 +1090,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); return ResultReg; @@ -1095,7 +1100,7 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1115,7 +1120,7 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1137,7 +1142,7 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1160,7 +1165,7 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1181,7 +1186,7 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, uint64_t Imm1, uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1204,7 +1209,7 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1226,7 +1231,7 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned Op1, bool Op1IsKill, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1248,7 +1253,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); @@ -1264,7 +1269,7 @@ unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm1, uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index cb49a80..f0f4743 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -76,6 +76,12 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; const TargetRegisterClass *UseRC = NULL; + EVT VT = Node->getValueType(ResNo); + + // Stick to the preferred register classes for legal types. + if (TLI->isTypeLegal(VT)) + UseRC = TLI->getRegClassFor(VT); + if (!IsClone && !IsCloned) for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { @@ -100,10 +106,10 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, continue; Match = false; if (User->isMachineOpcode()) { - const TargetInstrDesc &II = TII->get(User->getMachineOpcode()); + const MCInstrDesc &II = TII->get(User->getMachineOpcode()); const TargetRegisterClass *RC = 0; if (i+II.getNumDefs() < II.getNumOperands()) - RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI); + RC = TII->getRegClass(II, i+II.getNumDefs(), TRI); if (!UseRC) UseRC = RC; else if (RC) { @@ -121,10 +127,9 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, break; } - EVT VT = Node->getValueType(ResNo); const TargetRegisterClass *SrcRC = 0, *DstRC = 0; SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); - + // Figure out the register class to create for the destreg. if (VRBase) { DstRC = MRI->getRegClass(VRBase); @@ -173,7 +178,7 @@ unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, } void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, - const TargetInstrDesc &II, + const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && @@ -184,7 +189,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. unsigned VRBase = 0; - const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI); + const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -237,7 +242,7 @@ unsigned InstrEmitter::getVR(SDValue Op, Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); - // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc + // IMPLICIT_DEF can produce any type of result so its MCInstrDesc // does not include operand register class info. if (!VReg) { const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); @@ -260,7 +265,7 @@ unsigned InstrEmitter::getVR(SDValue Op, void InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && @@ -270,9 +275,9 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned VReg = getVR(Op, VRBaseMap); assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); - const TargetInstrDesc &TID = MI->getDesc(); - bool isOptDef = IIOpNum < TID.getNumOperands() && - TID.OpInfo[IIOpNum].isOptionalDef(); + const MCInstrDesc &MCID = MI->getDesc(); + bool isOptDef = IIOpNum < MCID.getNumOperands() && + MCID.OpInfo[IIOpNum].isOptionalDef(); // If the instruction requires a register in a different class, create // a new virtual register and copy the value into it. @@ -280,8 +285,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) - DstRC = II->OpInfo[IIOpNum].getRegClass(TRI); - assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) && + DstRC = TII->getRegClass(*II, IIOpNum, TRI); + assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) && "Don't have operand info for this instruction!"); if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); @@ -307,7 +312,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, while (Idx > 0 && MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) --Idx; - bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; + bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1; if (isTied) isKill = false; } @@ -325,7 +330,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, /// assertions only. void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { @@ -543,17 +548,18 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, void InstrEmitter::EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsClone, bool IsCloned) { - const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0)); + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); unsigned NewVReg = MRI->createVirtualRegister(RC); MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::REG_SEQUENCE), NewVReg); unsigned NumOps = Node->getNumOperands(); - assert((NumOps & 1) == 0 && - "REG_SEQUENCE must have an even number of operands!"); - const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); - for (unsigned i = 0; i != NumOps; ++i) { + assert((NumOps & 1) == 1 && + "REG_SEQUENCE must have an odd number of operands!"); + const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); + for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); - if (i & 1) { + if ((i & 1) == 0) { unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); @@ -591,7 +597,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL); } // Otherwise, we're going to create an instruction here. - const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); MachineInstrBuilder MIB = BuildMI(*MF, DL, II); if (SD->getKind() == SDDbgValue::SDNODE) { SDNode *Node = SD->getSDNode(); @@ -610,12 +616,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - // FIXME: SDDbgValue constants aren't updated with legalization, so it's - // possible to have i128 constants in them at this point. Dwarf writer - // does not handle i128 constants at the moment so, as a crude workaround, - // just drop the debug info if this happens. - if (!CI->getValue().isSignedIntN(64)) - MIB.addReg(0U); + if (CI->getBitWidth() > 64) + MIB.addCImm(CI); else MIB.addImm(CI->getSExtValue()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { @@ -666,7 +668,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // We want a unique VR for each IMPLICIT_DEF use. return; - const TargetInstrDesc &II = TII->get(Opc); + const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NodeOperands = CountOperands(Node); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; @@ -695,9 +697,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); else { // Collect declared implicit uses. - const TargetInstrDesc &TID = TII->get(F->getMachineOpcode()); - UsedRegs.append(TID.getImplicitUses(), - TID.getImplicitUses() + TID.getNumImplicitUses()); + const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); + UsedRegs.append(MCID.getImplicitUses(), + MCID.getImplicitUses() + MCID.getNumImplicitUses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) @@ -849,6 +851,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } break; case InlineAsm::Kind_RegDefEarlyClobber: + case InlineAsm::Kind_Clobber: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 02c044c..19fc044 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -22,7 +22,7 @@ namespace llvm { -class TargetInstrDesc; +class MCInstrDesc; class SDDbgValue; class InstrEmitter { @@ -49,7 +49,7 @@ class InstrEmitter { unsigned ResNo) const; void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, - const TargetInstrDesc &II, + const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap); @@ -63,7 +63,7 @@ class InstrEmitter { /// not in the required register class. void AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); @@ -73,7 +73,7 @@ class InstrEmitter { /// assertions only. void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 62d777c..d06e2bd 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -58,17 +58,6 @@ class SelectionDAGLegalize { /// against each other, including inserted libcalls. SmallVector<SDValue, 8> LastCALLSEQ; - enum LegalizeAction { - Legal, // The target natively supports this operation. - Promote, // This operation should be executed in a larger type. - Expand // Try to expand this to other ops, otherwise use a libcall. - }; - - /// ValueTypeActions - This is a bitvector that contains two bits for each - /// value type, where the two bits correspond to the LegalizeAction enum. - /// This can be queried with "getTypeAction(VT)". - TargetLowering::ValueTypeActionImpl ValueTypeActions; - /// LegalizedNodes - For nodes that are of legal width, and that have more /// than one use, this map indicates what regularized operand to use. This /// allows us to avoid legalizing the same thing more than once. @@ -87,25 +76,11 @@ class SelectionDAGLegalize { public: explicit SelectionDAGLegalize(SelectionDAG &DAG); - /// getTypeAction - Return how we should legalize values of this type, either - /// it is already legal or we need to expand it into multiple registers of - /// smaller integer type, or we need to promote it to a larger type. - LegalizeAction getTypeAction(EVT VT) const { - return (LegalizeAction)TLI.getTypeAction(*DAG.getContext(), VT); - } - - /// isTypeLegal - Return true if this type is legal on this target. - /// - bool isTypeLegal(EVT VT) const { - return getTypeAction(VT) == Legal; - } - void LegalizeDAG(); private: - /// LegalizeOp - We know that the specified value has a legal type. - /// Recursively ensure that the operands have legal types, then return the - /// result. + /// LegalizeOp - Return a legal replacement for the given operation, with + /// all legal operands. SDValue LegalizeOp(SDValue O); SDValue OptimizeFloatStore(StoreSDNode *ST); @@ -220,10 +195,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), - DAG(dag), - ValueTypeActions(TLI.getValueTypeActions()) { - assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && - "Too many value types for ValueTypeActions to hold!"); + DAG(dag) { } void SelectionDAGLegalize::LegalizeDAG() { @@ -753,7 +725,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { DebugLoc dl = ST->getDebugLoc(); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && - getTypeAction(MVT::i32) == Legal) { + TLI.isTypeLegal(MVT::i32)) { Tmp3 = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), MVT::i32); @@ -763,14 +735,14 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. - if (getTypeAction(MVT::i64) == Legal) { + if (TLI.isTypeLegal(MVT::i64)) { Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); } - if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { + if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { // Otherwise, if the target supports 32-bit registers, use 2 32-bit // stores. If the target supports neither 32- nor 64-bits, this // xform is certainly not worth it. @@ -794,10 +766,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { return SDValue(0, 0); } -/// LegalizeOp - We know that the specified value has a legal type, and -/// that its operands are legal. Now ensure that the operation itself -/// is legal, recursively ensuring that the operands' operations remain -/// legal. +/// LegalizeOp - Return a legal replacement for the given operation, with +/// all legal operands. SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. return Op; @@ -806,11 +776,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { DebugLoc dl = Node->getDebugLoc(); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert(getTypeAction(Node->getValueType(i)) == Legal && + assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal && "Unexpected illegal type!"); for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - assert((isTypeLegal(Node->getOperand(i).getValueType()) || + assert((TLI.getTypeAction(*DAG.getContext(), + Node->getOperand(i).getValueType()) == + TargetLowering::TypeLegal || Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); @@ -1354,7 +1327,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } break; case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), @@ -1374,6 +1347,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Load.getValue(1)); break; } + + // If this is a promoted vector load, and the vector element types are + // legal, then scalarize it. + if (ExtType == ISD::EXTLOAD && SrcVT.isVector() && + TLI.isTypeLegal(Node->getValueType(0).getScalarType())) { + SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, + Node->getValueType(0).getScalarType(), + Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, + Node->getValueType(0), &LoadVals[0], LoadVals.size()); + + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. + break; + } + + // If this is a promoted vector load, and the vector element types are + // illegal, create the promoted vector from bitcasted segments. + if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) { + EVT MemElemTy = Node->getValueType(0).getScalarType(); + EVT SrcSclrTy = SrcVT.getScalarType(); + unsigned SizeRatio = + (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits()); + + SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, + SrcVT.getScalarType(), + Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + if (TLI.isBigEndian()) { + // MSB (which is garbage, comes first) + LoadVals.push_back(ScalarLoad.getValue(0)); + for (unsigned i = 0; i<SizeRatio-1; ++i) + LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); + } else { + // LSB (which is data, comes first) + for (unsigned i = 0; i<SizeRatio-1; ++i) + LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); + LoadVals.push_back(ScalarLoad.getValue(0)); + } + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getScalarType(), NumElem*SizeRatio); + SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, + TempWideVector, &LoadVals[0], LoadVals.size()); + + // Cast to the correct type + ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes); + + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. + break; + + } + // FIXME: This does not work for vectors on most targets. Sign- and // zero-extend operations are currently folded into extending loads, // whether they are legal or not, and then we end up here without any @@ -1548,9 +1606,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case TargetLowering::Custom: Result = TLI.LowerOperation(Result, DAG); break; - case Expand: + case TargetLowering::Expand: + + EVT WideScalarVT = Tmp3.getValueType().getScalarType(); + EVT NarrowScalarVT = StVT.getScalarType(); + + // The Store type is illegal, must scalarize the vector store. + SmallVector<SDValue, 8> Stores; + bool ScalarLegal = TLI.isTypeLegal(WideScalarVT); + if (!TLI.isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) { + unsigned NumElem = StVT.getVectorNumElements(); + + unsigned ScalarSize = StVT.getScalarType().getSizeInBits(); + // Round odd types to the next pow of two. + if (!isPowerOf2_32(ScalarSize)) + ScalarSize = NextPowerOf2(ScalarSize); + // Types smaller than 8 bits are promoted to 8 bits. + ScalarSize = std::max<unsigned>(ScalarSize, 8); + // Store stride + unsigned Stride = ScalarSize/8; + assert(isPowerOf2_32(Stride) && "Stride must be a power of two"); + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); + + + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize); + + Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex); + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, + ST->getPointerInfo().getWithOffset(Idx*Stride), + isVolatile, isNonTemporal, Alignment); + Stores.push_back(Store); + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + break; + } + + // The Store type is illegal, must scalarize the vector store. + // However, the scalar type is illegal. Must bitcast the result + // and store it in smaller parts. + if (!TLI.isTypeLegal(StVT) && StVT.isVector()) { + unsigned WideNumElem = StVT.getVectorNumElements(); + unsigned Stride = NarrowScalarVT.getSizeInBits()/8; + + unsigned SizeRatio = + (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits()); + + EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT, + SizeRatio*WideNumElem); + + // Cast the wide elem vector to wider vec with smaller elem type. + // Example <2 x i64> -> <4 x i32> + Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3); + + for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) { + // Extract elment i + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); + // bump pointer. + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + + // Store if, this element is: + // - First element on big endian, or + // - Last element on little endian + if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) || + ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) { + SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, + ST->getPointerInfo().getWithOffset(Idx*Stride), + isVolatile, isNonTemporal, Alignment); + Stores.push_back(Store); + } + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + break; + } + + // TRUNCSTORE:i16 i32 -> STORE i16 - assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); + assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); @@ -1709,7 +1849,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { SDValue SignBit; EVT FloatVT = Tmp2.getValueType(); EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits()); - if (isTypeLegal(IVT)) { + if (TLI.isTypeLegal(IVT)) { // Convert to an integer with the same sign bit. SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2); } else { @@ -3031,7 +3171,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); - if (getTypeAction(EltVT) == Promote) + if (!TLI.isTypeLegal(EltVT)) EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); unsigned NumElems = VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops; @@ -3184,6 +3324,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_PPCF128)); break; + case ISD::FMA: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_PPCF128)); + break; case ISD::FP16_TO_FP32: Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 27a466b..e6835d8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -74,6 +74,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; @@ -294,6 +295,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { NVT, &Op, 1, false, N->getDebugLoc()); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + NVT, Ops, 3, false, N->getDebugLoc()); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), @@ -837,6 +851,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; @@ -989,6 +1004,19 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + N->getValueType(0), Ops, 3, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index b8da57f..e7c77dd 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -19,6 +19,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "llvm/DerivedTypes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -191,10 +192,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { if (NOutVT.bitsEq(NInVT)) // The input promotes to the same size. Convert the promoted value. return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); - if (NInVT.isVector()) - // Promote vector element via memory load/store. - return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, - CreateStackStoreLoad(InOp, OutVT)); break; case TargetLowering::TypeSoftenFloat: // Promote the integer operand by hand. @@ -204,8 +201,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { break; case TargetLowering::TypeScalarizeVector: // Convert the element to an integer and promote it by hand. - return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, - BitConvertToInteger(GetScalarizedVector(InOp))); + if (!NOutVT.isVector()) + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + BitConvertToInteger(GetScalarizedVector(InOp))); + break; case TargetLowering::TypeSplitVector: { // For example, i32 = BITCAST v2i16 on alpha. Convert the split // pieces of the input into integers and reassemble in the final type. @@ -339,8 +338,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // (eg: because the value being converted is too big), then the result of the // original operation was undefined anyway, so the assert is still correct. return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? - ISD::AssertZext : ISD::AssertSext, dl, - NVT, Res, DAG.getValueType(N->getValueType(0))); + ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, + DAG.getValueType(N->getValueType(0).getScalarType())); } SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { @@ -370,7 +369,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, DAG.getValueType(N->getOperand(0).getValueType())); if (N->getOpcode() == ISD::ZERO_EXTEND) - return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType()); + return DAG.getZeroExtendInReg(Res, dl, + N->getOperand(0).getValueType().getScalarType()); assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!"); return Res; } @@ -520,20 +520,44 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res; + SDValue InOp = N->getOperand(0); + DebugLoc dl = N->getDebugLoc(); - switch (getTypeAction(N->getOperand(0).getValueType())) { + switch (getTypeAction(InOp.getValueType())) { default: llvm_unreachable("Unknown type action!"); case TargetLowering::TypeLegal: case TargetLowering::TypeExpandInteger: - Res = N->getOperand(0); + Res = InOp; break; case TargetLowering::TypePromoteInteger: - Res = GetPromotedInteger(N->getOperand(0)); + Res = GetPromotedInteger(InOp); break; + case TargetLowering::TypeSplitVector: + EVT InVT = InOp.getValueType(); + assert(InVT.isVector() && "Cannot split scalar types"); + unsigned NumElts = InVT.getVectorNumElements(); + assert(NumElts == NVT.getVectorNumElements() && + "Dst and Src must have the same number of elements"); + EVT EltVT = InVT.getScalarType(); + assert(isPowerOf2_32(NumElts) && + "Promoted vector type must be a power of two"); + + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2); + EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), + NumElts/2); + + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp, + DAG.getIntPtrConstant(0)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp, + DAG.getIntPtrConstant(NumElts/2)); + EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); + EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2); } // Truncate to NVT instead of VT - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res); + return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); } SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { @@ -970,7 +994,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SDValue Op = GetPromotedInteger(N->getOperand(0)); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); - return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType()); + return DAG.getZeroExtendInReg(Op, dl, + N->getOperand(0).getValueType().getScalarType()); } @@ -1069,6 +1094,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; case ISD::UADDO: case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; + case ISD::UMULO: + case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -2146,6 +2173,86 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, ReplaceValueWith(SDValue(N, 1), Ofl); } +void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + const Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); + EVT PtrVT = TLI.getPointerTy(); + const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); + DebugLoc dl = N->getDebugLoc(); + + // A divide for UMULO should be faster than a function call. + if (N->getOpcode() == ISD::UMULO) { + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + SplitInteger(MUL, Lo, Hi); + + // A divide for UMULO will be faster than a function call. Select to + // make sure we aren't using 0. + SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + RHS, DAG.getConstant(0, VT), ISD::SETNE); + SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, + DAG.getConstant(1, VT), RHS); + SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero); + SDValue Overflow; + Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE); + ReplaceValueWith(SDValue(N, 1), Overflow); + return; + } + + // Replace this with a libcall that will check overflow. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::MULO_I32; + else if (VT == MVT::i64) + LC = RTLIB::MULO_I64; + else if (VT == MVT::i128) + LC = RTLIB::MULO_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!"); + + SDValue Temp = DAG.CreateStackTemporary(PtrVT); + // Temporary for the overflow value, default it to zero. + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, + DAG.getConstant(0, PtrVT), Temp, + MachinePointerInfo(), false, false, 0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + EVT ArgVT = N->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = N->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = true; + Entry.isZExt = false; + Args.push_back(Entry); + } + + // Also pass the address of the overflow check. + Entry.Node = Temp; + Entry.Ty = PtrTy->getPointerTo(); + Entry.isSExt = true; + Entry.isZExt = false; + Args.push_back(Entry); + + SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(Chain, RetTy, true, false, false, false, + 0, TLI.getLibcallCallingConv(LC), false, + true, Func, Args, DAG, dl); + + SplitInteger(CallInfo.first, Lo, Hi); + SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, + MachinePointerInfo(), false, false, 0); + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, + DAG.getConstant(0, PtrVT), + ISD::SETNE); + // Use the overflow from the libcall everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -2638,18 +2745,18 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp0 = N->getOperand(0); EVT InVT = InOp0.getValueType(); - EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); - unsigned OutNumElems = N->getValueType(0).getVectorNumElements(); + unsigned OutNumElems = OutVT.getVectorNumElements(); EVT NOutVTElem = NOutVT.getVectorElementType(); DebugLoc dl = N->getDebugLoc(); SDValue BaseIdx = N->getOperand(1); SmallVector<SDValue, 8> Ops; + Ops.reserve(OutNumElems); for (unsigned i = 0; i != OutNumElems; ++i) { // Extract the element from the original vector. @@ -2681,18 +2788,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { SDValue V0 = GetPromotedInteger(N->getOperand(0)); SDValue V1 = GetPromotedInteger(N->getOperand(1)); - EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + EVT OutVT = V0.getValueType(); - return DAG.getVectorShuffle(OutVT, dl, V0,V1, &NewMask[0]); + return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { - - SDValue InOp0 = N->getOperand(0); - EVT InVT = InOp0.getValueType(); - EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); @@ -2702,6 +2804,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SmallVector<SDValue, 8> Ops; + Ops.reserve(NumElems); for (unsigned i = 0; i != NumElems; ++i) { SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); Ops.push_back(Op); @@ -2714,10 +2817,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue InOp0 = N->getOperand(0); - EVT InVT = InOp0.getValueType(); - EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - assert(!InVT.isVector() && "Input must not be a scalar"); + assert(!N->getOperand(0).getValueType().isVector() && + "Input must be a scalar"); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); @@ -2730,12 +2831,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { - - SDValue InOp0 = N->getOperand(0); - EVT InVT = InOp0.getValueType(); - EVT InElVT = InVT.getVectorElementType(); - EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); @@ -2744,7 +2839,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp0); + SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + N->getOperand(0)); SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(1)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b2f966b..952797d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -201,7 +201,7 @@ private: EVT OldVT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); Op = GetPromotedInteger(Op); - return DAG.getZeroExtendInReg(Op, dl, OldVT); + return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); } // Integer Result Promotion. @@ -318,6 +318,7 @@ private: void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); @@ -377,6 +378,7 @@ private: SDValue SoftenFloatRes_FLOG(SDNode *N); SDValue SoftenFloatRes_FLOG2(SDNode *N); SDValue SoftenFloatRes_FLOG10(SDNode *N); + SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); SDValue SoftenFloatRes_FNEG(SDNode *N); @@ -441,6 +443,7 @@ private: void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 5d0f923..ffff10c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -182,9 +182,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; - case ISD::SIGN_EXTEND_INREG: case ISD::FP_ROUND_INREG: QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 9595f69..b5698f9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2164,6 +2164,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, if (MemVT.getSizeInBits() <= WidenEltWidth) break; if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { RetVT = MemVT; @@ -2179,6 +2180,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 7b560d1..b275c63 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -249,14 +249,14 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != TID.getNumOperands(); ++i) { - if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { NewSU->isTwoAddress = true; break; } } - if (TID.isCommutable()) + if (MCID.isCommutable()) NewSU->isCommutable = true; // LoadNode may already exist. This can happen when there is another @@ -422,10 +422,10 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// FIXME: Move to SelectionDAG? static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = TID.getNumDefs(); - for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = MCID.getNumDefs(); + for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -490,7 +490,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, ++i; // Skip the ID value. if (InlineAsm::isRegDefKind(Flags) || - InlineAsm::isRegDefEarlyClobberKind(Flags)) { + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); @@ -504,10 +505,10 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, } if (!Node->isMachineOpcode()) continue; - const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); - if (!TID.ImplicitDefs) + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { + for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 8d61a89..12b1838 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -276,6 +276,43 @@ private: }; } // end anonymous namespace +/// GetCostForDef - Looks up the register class and cost for a given definition. +/// Typically this just means looking up the representative register class, +/// but for untyped values (MVT::untyped) it means inspecting the node's +/// opcode to determine what register class is being generated. +static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, + const TargetLowering *TLI, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + unsigned &RegClass, unsigned &Cost) { + EVT VT = RegDefPos.GetValue(); + + // Special handling for untyped values. These values can only come from + // the expansion of custom DAG-to-DAG patterns. + if (VT == MVT::untyped) { + const SDNode *Node = RegDefPos.GetNode(); + unsigned Opcode = Node->getMachineOpcode(); + + if (Opcode == TargetOpcode::REG_SEQUENCE) { + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); + RegClass = RC->getID(); + Cost = 1; + return; + } + + unsigned Idx = RegDefPos.GetIdx(); + const MCInstrDesc Desc = TII->get(Opcode); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); + RegClass = RC->getID(); + // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a + // better way to determine it. + Cost = 1; + } else { + RegClass = TLI->getRepRegClassFor(VT)->getID(); + Cost = TLI->getRepRegClassCostFor(VT); + } +} /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { @@ -800,14 +837,14 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != TID.getNumOperands(); ++i) { - if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { NewSU->isTwoAddress = true; break; } } - if (TID.isCommutable()) + if (MCID.isCommutable()) NewSU->isCommutable = true; InitNumRegDefsLeft(NewSU); @@ -987,10 +1024,10 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// FIXME: Move to SelectionDAG? static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = TID.getNumDefs(); - for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = MCID.getNumDefs(); + for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1055,7 +1092,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { ++i; // Skip the ID value. if (InlineAsm::isRegDefKind(Flags) || - InlineAsm::isRegDefEarlyClobberKind(Flags)) { + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); @@ -1070,10 +1108,10 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { if (!Node->isMachineOpcode()) continue; - const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); - if (!TID.ImplicitDefs) + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) + for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } @@ -1369,6 +1407,21 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } }; +#ifndef NDEBUG +template<class SF> +struct reverse_sort : public queue_sort { + SF &SortFunc; + reverse_sort(SF &sf) : SortFunc(sf) {} + reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} + + bool operator()(SUnit* left, SUnit* right) const { + // reverse left/right rather than simply !SortFunc(left, right) + // to expose different paths in the comparison logic. + return SortFunc(right, left); + } +}; +#endif // NDEBUG + /// bu_ls_rr_sort - Priority function for bottom up register pressure // reduction scheduler. struct bu_ls_rr_sort : public queue_sort { @@ -1569,20 +1622,33 @@ protected: }; template<class SF> -class RegReductionPriorityQueue : public RegReductionPQBase { - static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) { - std::vector<SUnit *>::iterator Best = Q.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), - E = Q.end(); I != E; ++I) - if (Picker(*Best, *I)) - Best = I; - SUnit *V = *Best; - if (Best != prior(Q.end())) - std::swap(*Best, Q.back()); - Q.pop_back(); - return V; +static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { + std::vector<SUnit *>::iterator Best = Q.begin(); + for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + E = Q.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Q.end())) + std::swap(*Best, Q.back()); + Q.pop_back(); + return V; +} + +template<class SF> +SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { +#ifndef NDEBUG + if (DAG->StressSched) { + reverse_sort<SF> RPicker(Picker); + return popFromQueueImpl(Q, RPicker); } +#endif + (void)DAG; + return popFromQueueImpl(Q, Picker); +} +template<class SF> +class RegReductionPriorityQueue : public RegReductionPQBase { SF Picker; public: @@ -1603,7 +1669,7 @@ public: SUnit *pop() { if (Queue.empty()) return NULL; - SUnit *V = popFromQueue(Queue, Picker); + SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); V->NodeQueueId = 0; return V; } @@ -1613,7 +1679,7 @@ public: std::vector<SUnit*> DumpQueue = Queue; SF DumpPicker = Picker; while (!DumpQueue.empty()) { - SUnit *SU = popFromQueue(DumpQueue, DumpPicker); + SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); if (isBottomUp()) dbgs() << "Height " << SU->getHeight() << ": "; else @@ -1778,9 +1844,9 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { } for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); RegDefPos.IsValid(); RegDefPos.Advance()) { - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) return true; } @@ -1891,9 +1957,10 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { if (SkipRegDefs) continue; - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + RegPressure[RCId] += Cost; break; } } @@ -1906,16 +1973,16 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { if (SkipRegDefs > 0) continue; - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) { + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + if (RegPressure[RCId] < Cost) { // Register pressure tracking is imprecise. This can happen. But we try // hard not to let it happen because it likely results in poor scheduling. DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); RegPressure[RCId] = 0; } else { - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + RegPressure[RCId] -= Cost; } } dumpRegPressure(); @@ -1962,13 +2029,9 @@ void RegReductionPQBase::UnscheduledNode(SUnit *SU) { unsigned POpc = PN->getMachineOpcode(); if (POpc == TargetOpcode::IMPLICIT_DEF) continue; - if (POpc == TargetOpcode::EXTRACT_SUBREG) { - EVT VT = PN->getOperand(0).getValueType(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } else if (POpc == TargetOpcode::INSERT_SUBREG || - POpc == TargetOpcode::SUBREG_TO_REG) { + if (POpc == TargetOpcode::EXTRACT_SUBREG || + POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { EVT VT = PN->getValueType(0); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); @@ -2543,11 +2606,11 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { if (SU->isTwoAddress) { unsigned Opc = SU->getNode()->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - unsigned NumRes = TID.getNumDefs(); - unsigned NumOps = TID.getNumOperands() - NumRes; + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; for (unsigned i = 0; i != NumOps; ++i) { - if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { + if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { SDNode *DU = SU->getNode()->getOperand(i).getNode(); if (DU->getNodeId() != -1 && Op->OrigNode == &(*SUnits)[DU->getNodeId()]) @@ -2727,11 +2790,11 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { bool isLiveOut = hasOnlyLiveOutUses(SU); unsigned Opc = Node->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - unsigned NumRes = TID.getNumDefs(); - unsigned NumOps = TID.getNumOperands() - NumRes; + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; for (unsigned j = 0; j != NumOps; ++j) { - if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) + if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) continue; SDNode *DU = SU->getNode()->getOperand(j).getNode(); if (DU->getNodeId() == -1) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 9f2f012..71f07d6 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -17,11 +17,12 @@ #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -111,7 +112,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, unsigned ResNo = User->getOperand(2).getResNo(); if (Def->isMachineOpcode()) { - const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); + const MCInstrDesc &II = TII->get(Def->getMachineOpcode()); if (ResNo >= II.getNumDefs() && II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { PhysReg = Reg; @@ -255,8 +256,8 @@ void ScheduleDAGSDNodes::ClusterNodes() { continue; unsigned Opc = Node->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - if (TID.mayLoad()) + const MCInstrDesc &MCID = TII->get(Opc); + if (MCID.mayLoad()) // Cluster loads from "near" addresses into combined SUnits. ClusterNeighboringLoads(Node); } @@ -378,7 +379,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { } void ScheduleDAGSDNodes::AddSchedEdges() { - const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); @@ -390,14 +391,14 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (MainNode->isMachineOpcode()) { unsigned Opc = MainNode->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - for (unsigned i = 0; i != TID.getNumOperands(); ++i) { - if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + const MCInstrDesc &MCID = TII->get(Opc); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { SU->isTwoAddress = true; break; } } - if (TID.isCommutable()) + if (MCID.isCommutable()) SU->isCommutable = true; } @@ -435,7 +436,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // it requires a cross class copy (cost < 0). That means we are only // treating "expensive to copy" register dependency as physical register // dependency. This may change in the future though. - if (Cost >= 0) + if (Cost >= 0 && !StressSched) PhysReg = 0; // If this is a ctrl dep, latency is 1. @@ -520,14 +521,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() { for (;DefIdx < NodeNumDefs; ++DefIdx) { if (!Node->hasAnyUseOfValue(DefIdx)) continue; - if (Node->isMachineOpcode() && - Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) { - // Propagate the incoming (full-register) type. I doubt it's needed. - ValueType = Node->getOperand(0).getValueType(); - } - else { - ValueType = Node->getValueType(DefIdx); - } + ValueType = Node->getValueType(DefIdx); ++DefIdx; return; // Found a normal regdef. } @@ -649,7 +643,7 @@ static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, // order number right after the N. MachineBasicBlock *BB = Emitter.getBlock(); MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); - SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N); + ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N); for (unsigned i = 0, e = DVs.size(); i != e; ++i) { if (DVs[i]->isInvalidated()) continue; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index b5f68f3..9c27b2e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -135,6 +135,14 @@ namespace llvm { return ValueType; } + const SDNode *GetNode() const { + return Node; + } + + unsigned GetIdx() const { + return DefIdx-1; + } + void Advance(); private: void InitNodeNumDefs(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 68eeb60..35ea0bb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -598,7 +598,7 @@ void SelectionDAG::DeallocateNode(SDNode *N) { Ordering->remove(N); // If any of the SDDbgValue nodes refer to this SDNode, invalidate them. - SmallVector<SDDbgValue*, 2> &DbgVals = DbgInfo->getSDDbgValues(N); + ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N); for (unsigned i = 0, e = DbgVals.size(); i != e; ++i) DbgVals[i]->setIsInvalidated(); } @@ -3326,13 +3326,13 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && "Expecting memcpy / memset source to meet alignment requirement!"); - // If 'SrcAlign' is zero, that means the memory operation does not need load - // the value, i.e. memset or memcpy from constant string. Otherwise, it's - // the inferred alignment of the source. 'DstAlign', on the other hand, is the - // specified alignment of the memory operation. If it is zero, that means - // it's possible to change the alignment of the destination. 'MemcpyStrSrc' - // indicates whether the memcpy source is constant so it does not need to be - // loaded. + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, NonScalarIntSafe, MemcpyStrSrc, DAG.getMachineFunction()); @@ -4037,6 +4037,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); @@ -4142,6 +4144,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); @@ -4165,6 +4169,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -4191,6 +4197,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); @@ -4216,6 +4224,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (VT == SVT) return getStore(Chain, dl, Val, Ptr, MMO); @@ -5508,9 +5518,9 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { return; SDNode *FromNode = From.getNode(); SDNode *ToNode = To.getNode(); - SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode); + ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode); SmallVector<SDDbgValue *, 2> ClonedDVs; - for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end(); + for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end(); I != E; ++I) { SDDbgValue *Dbg = *I; if (Dbg->getKind() == SDDbgValue::SDNODE) { @@ -5691,24 +5701,39 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, return false; } -/// isPredecessorOf - Return true if this node is a predecessor of N. This node -/// is either an operand of N or it can be reached by traversing up the operands. -/// NOTE: this is an expensive method. Use it carefully. -bool SDNode::isPredecessorOf(SDNode *N) const { - SmallPtrSet<SDNode *, 32> Visited; - SmallVector<SDNode *, 16> Worklist; - Worklist.push_back(N); +/// hasPredecessor - Return true if N is a predecessor of this node. +/// N is either an operand of this node, or can be reached by recursively +/// traversing up the operands. +/// NOTE: This is an expensive method. Use it carefully. +bool SDNode::hasPredecessor(const SDNode *N) const { + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + return hasPredecessorHelper(N, Visited, Worklist); +} - do { - N = Worklist.pop_back_val(); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - SDNode *Op = N->getOperand(i).getNode(); - if (Op == this) - return true; +bool SDNode::hasPredecessorHelper(const SDNode *N, + SmallPtrSet<const SDNode *, 32> &Visited, + SmallVector<const SDNode *, 16> &Worklist) const { + if (Visited.empty()) { + Worklist.push_back(this); + } else { + // Take a look in the visited set. If we've already encountered this node + // we needn't search further. + if (Visited.count(N)) + return true; + } + + // Haven't visited N yet. Continue the search. + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + SDNode *Op = M->getOperand(i).getNode(); if (Visited.insert(Op)) Worklist.push_back(Op); + if (Op == N) + return true; } - } while (!Worklist.empty()); + } return false; } @@ -5863,6 +5888,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSUB: return "fsub"; case ISD::FMUL: return "fmul"; case ISD::FDIV: return "fdiv"; + case ISD::FMA: return "fma"; case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 7a8a975..81b03ee 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -286,22 +286,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() && "Cannot handle this kind of promotion"); // Promoted vector extract - unsigned NumElts = ValueVT.getVectorNumElements(); - SmallVector<SDValue, 8> NewOps; - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT.getScalarType(), Val ,DAG.getIntPtrConstant(i)); - SDValue Cast; + bool Smaller = ValueVT.bitsLE(PartVT); + return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT, Val); - bool Smaller = ValueVT.bitsLE(PartVT); - - Cast = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT.getScalarType(), Ext); - - NewOps.push_back(Cast); - } - return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, - &NewOps[0], NewOps.size()); } // Trivial bitcast if the types are the same size and the destination @@ -310,9 +298,17 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && + // Handle cases such as i8 -> <1 x i1> + assert(ValueVT.getVectorNumElements() == 1 && "Only trivial scalar-to-vector conversions should get here!"); + + if (ValueVT.getVectorNumElements() == 1 && + ValueVT.getVectorElementType() != PartVT) { + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT.getScalarType(), Val); + } + return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } @@ -453,7 +449,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (PartVT.isVector() && - PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& + PartVT.getVectorElementType() == ValueVT.getVectorElementType() && PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { EVT ElementVT = PartVT.getVectorElementType(); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in @@ -475,28 +471,23 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); } else if (PartVT.isVector() && PartVT.getVectorElementType().bitsGE( - ValueVT.getVectorElementType())&& + ValueVT.getVectorElementType()) && PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { // Promoted vector extract - unsigned NumElts = ValueVT.getVectorNumElements(); - SmallVector<SDValue, 8> NewOps; - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ValueVT.getScalarType(), Val ,DAG.getIntPtrConstant(i)); - SDValue Cast = DAG.getNode(ISD::ANY_EXTEND, - DL, PartVT.getScalarType(), Ext); - NewOps.push_back(Cast); - } - Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, - &NewOps[0], NewOps.size()); + bool Smaller = PartVT.bitsLE(ValueVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, PartVT, Val); } else{ // Vector -> scalar conversion. - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && + assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getIntPtrConstant(0)); + + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, PartVT, Val); } Parts[0] = Val; @@ -1280,6 +1271,24 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, return true; } +/// Return branch probability calculated by BranchProbabilityInfo for IR blocks. +uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) { + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (!BPI) + return 0; + BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock()); + BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock()); + return BPI->getEdgeWeight(SrcBB, DstBB); +} + +void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) { + uint32_t weight = getEdgeWeight(Src, Dst); + Src->addSuccessor(Dst, weight); +} + + static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast<Instruction>(V)) return I->getParent() == BB; @@ -1549,8 +1558,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } // Update successor info - SwitchBB->addSuccessor(CB.TrueBB); - SwitchBB->addSuccessor(CB.FalseBB); + addSuccessorWithWeight(SwitchBB, CB.TrueBB); + addSuccessorWithWeight(SwitchBB, CB.FalseBB); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1694,8 +1703,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - SwitchBB->addSuccessor(B.Default); - SwitchBB->addSuccessor(MBB); + addSuccessorWithWeight(SwitchBB, B.Default); + addSuccessorWithWeight(SwitchBB, MBB); SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, CopyTo, RangeCmp, @@ -1718,7 +1727,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, VT); SDValue Cmp; - if (CountPopulation_64(B.Mask) == 1) { + unsigned PopCount = CountPopulation_64(B.Mask); + if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC(getCurDebugLoc(), @@ -1726,6 +1736,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ShiftOp, DAG.getConstant(CountTrailingZeros_64(B.Mask), VT), ISD::SETEQ); + } else if (PopCount == BB.Range) { + // There is only one zero bit in the range, test for it directly. + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(VT), + ShiftOp, + DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), + ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, @@ -1740,8 +1757,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ISD::SETNE); } - SwitchBB->addSuccessor(B.TargetBB); - SwitchBB->addSuccessor(NextMBB); + addSuccessorWithWeight(SwitchBB, B.TargetBB); + addSuccessorWithWeight(SwitchBB, NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1981,8 +1998,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, // table. MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); CurMF->insert(BBI, JumpTableBB); - CR.CaseBB->addSuccessor(Default); - CR.CaseBB->addSuccessor(JumpTableBB); + + addSuccessorWithWeight(CR.CaseBB, Default); + addSuccessorWithWeight(CR.CaseBB, JumpTableBB); // Build a vector of destination BBs, corresponding to each target // of the jump table. If the value of the jump table slot corresponds to @@ -2009,7 +2027,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, E = DestBBs.end(); I != E; ++I) { if (!SuccsHandled[(*I)->getNumber()]) { SuccsHandled[(*I)->getNumber()] = true; - JumpTableBB->addSuccessor(*I); + addSuccessorWithWeight(JumpTableBB, *I); } } @@ -2428,8 +2446,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { succs.push_back(I.getSuccessor(i)); array_pod_sort(succs.begin(), succs.end()); succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); - for (unsigned i = 0, e = succs.size(); i != e; ++i) - IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); + for (unsigned i = 0, e = succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]]; + addSuccessorWithWeight(IndirectBrMBB, Succ); + } DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -2489,6 +2509,22 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Op1.getValueType(), Op1, Op2)); } +void SelectionDAGBuilder::visitSDiv(const User &I) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + // Turn exact SDivs into multiplications. + // FIXME: This should be in DAGCombiner, but it doesn't have access to the + // exact bit. + if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && + !isa<ConstantSDNode>(Op1) && + isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) + setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG)); + else + setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(), + Op1, Op2)); +} + void SelectionDAGBuilder::visitICmp(const User &I) { ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) @@ -2855,7 +2891,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { bool IntoUndef = isa<UndefValue>(Op0); bool FromUndef = isa<UndefValue>(Op1); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); SmallVector<EVT, 4> AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); @@ -2895,7 +2931,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Type *ValTy = I.getType(); bool OutOfUndef = isa<UndefValue>(Op0); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); SmallVector<EVT, 4> ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); @@ -4623,6 +4659,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::pow: visitPow(I); return 0; + case Intrinsic::fma: + setValue(&I, DAG.getNode(ISD::FMA, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + return 0; case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, MVT::i16, getValue(I.getArgOperand(0)))); @@ -4759,6 +4802,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; + + case Intrinsic::expect: { + // Just replace __builtin_expect(exp, c) with EXP. + setValue(&I, getValue(I.getArgOperand(0))); + return 0; + } + case Intrinsic::trap: { StringRef TrapFuncName = getTrapFunctionName(); if (TrapFuncName.empty()) { @@ -4789,15 +4839,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return implVisitAluOverflow(I, ISD::SMULO); case Intrinsic::prefetch: { - SDValue Ops[4]; + SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = getValue(I.getArgOperand(3)); DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, DAG.getVTList(MVT::Other), - &Ops[0], 4, + &Ops[0], 5, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ @@ -5415,54 +5466,6 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; } // end anonymous namespace -/// isAllocatableRegister - If the specified register is safe to allocate, -/// i.e. it isn't a stack pointer or some other special register, return the -/// register class for the register. Otherwise, return null. -static const TargetRegisterClass * -isAllocatableRegister(unsigned Reg, MachineFunction &MF, - const TargetLowering &TLI, - const TargetRegisterInfo *TRI) { - EVT FoundVT = MVT::Other; - const TargetRegisterClass *FoundRC = 0; - for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), - E = TRI->regclass_end(); RCI != E; ++RCI) { - EVT ThisVT = MVT::Other; - - const TargetRegisterClass *RC = *RCI; - // If none of the value types for this register class are valid, we - // can't use it. For example, 64-bit reg classes on 32-bit targets. - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (TLI.isTypeLegal(*I)) { - // If we have already found this register in a different register class, - // choose the one with the largest VT specified. For example, on - // PowerPC, we favor f64 register classes over f32. - if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { - ThisVT = *I; - break; - } - } - } - - if (ThisVT == MVT::Other) continue; - - // NOTE: This isn't ideal. In particular, this might allocate the - // frame pointer in functions that need it (due to them not being taken - // out of allocation, because a variable sized allocation hasn't been seen - // yet). This is a slight code pessimization, but should still work. - for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), - E = RC->allocation_order_end(MF); I != E; ++I) - if (*I == Reg) { - // We found a matching register class. Keep looking at others in case - // we find one with larger registers that this physreg is also in. - FoundRC = RC; - FoundVT = ThisVT; - break; - } - } - return FoundRC; -} - /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the /// register allocator to handle the assignment process. However, if the asm @@ -5597,52 +5600,6 @@ static void GetRegistersForValue(SelectionDAG &DAG, return; } - // This is a reference to a register class that doesn't directly correspond - // to an LLVM register class. Allocate NumRegs consecutive, available, - // registers from the class. - std::vector<unsigned> RegClassRegs - = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); - unsigned NumAllocated = 0; - for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { - unsigned Reg = RegClassRegs[i]; - // See if this register is available. - if ((isOutReg && OutputRegs.count(Reg)) || // Already used. - (isInReg && InputRegs.count(Reg))) { // Already used. - // Make sure we find consecutive registers. - NumAllocated = 0; - continue; - } - - // Check to see if this register is allocatable (i.e. don't give out the - // stack pointer). - const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI); - if (!RC) { // Couldn't allocate this register. - // Reset NumAllocated to make sure we return consecutive registers. - NumAllocated = 0; - continue; - } - - // Okay, this register is good, we can use it. - ++NumAllocated; - - // If we allocated enough consecutive registers, succeed. - if (NumAllocated == NumRegs) { - unsigned RegStart = (i-NumAllocated)+1; - unsigned RegEnd = i+1; - // Mark all of the allocated registers used. - for (unsigned i = RegStart; i != RegEnd; ++i) - Regs.push_back(RegClassRegs[i]); - - OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), - OpInfo.ConstraintVT); - OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); - return; - } - } - // Otherwise, we couldn't allocate enough registers for this. } @@ -5749,10 +5706,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> InputRC = + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || - (OpInfo.ConstraintVT.getSizeInBits() != - Input.ConstraintVT.getSizeInBits())) { + (MatchRC.second != InputRC.second)) { report_fatal_error("Unsupported asm: input constraint" " with a matching output constraint of" " incompatible type!"); @@ -6015,8 +5975,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Don't know how to handle indirect register inputs yet!"); // Copy the input into the appropriate registers. - if (OpInfo.AssignedRegs.Regs.empty() || - !OpInfo.AssignedRegs.areValueTypesLegal(TLI)) + if (OpInfo.AssignedRegs.Regs.empty()) report_fatal_error("Couldn't allocate input reg for constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); @@ -6031,8 +5990,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) - OpInfo.AssignedRegs.AddInlineAsmOperands( - InlineAsm::Kind_RegDefEarlyClobber, + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, false, 0, DAG, AsmNodeOperands); break; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8376d41..a0884eb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -434,6 +434,9 @@ private: const Value* SV, MachineBasicBlock* Default, MachineBasicBlock *SwitchBB); + + uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); + void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); @@ -464,7 +467,7 @@ private: void visitSRem(const User &I) { visitBinary(I, ISD::SREM); } void visitFRem(const User &I) { visitBinary(I, ISD::FREM); } void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); } - void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); } + void visitSDiv(const User &I); void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); } void visitAnd (const User &I) { visitBinary(I, ISD::AND); } void visitOr (const User &I) { visitBinary(I, ISD::OR); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 771b008..87bb296 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -68,6 +69,11 @@ static cl::opt<bool> EnableFastISelAbort("fast-isel-abort", cl::Hidden, cl::desc("Enable abort calls when \"fast\" instruction fails")); +static cl::opt<bool> +UseMBPI("use-mbpi", + cl::desc("use Machine Branch Probability Info"), + cl::init(true), cl::Hidden); + #ifndef NDEBUG static cl::opt<bool> ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, @@ -186,6 +192,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, DAGSize(0) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { @@ -199,6 +206,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); + if (UseMBPI && OptLevel != CodeGenOpt::None) + AU.addRequired<BranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -262,6 +271,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { CurDAG->init(*MF); FuncInfo->set(Fn, *MF); + + if (UseMBPI && OptLevel != CodeGenOpt::None) + FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); + else + FuncInfo->BPI = 0; + SDB->init(GFI, *AA); SelectAllBasicBlocks(Fn); @@ -339,9 +354,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const MachineBasicBlock *MBB = I; for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { - const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); + const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); - if ((TID.isCall() && !TID.isReturn()) || + if ((MCID.isCall() && !MCID.isReturn()) || II->isStackAligningInlineAsm()) { MFI->setHasCalls(true); goto done; @@ -666,7 +681,7 @@ void SelectionDAGISel::PrepareEHLandingPad() { // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB); - const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); + const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); @@ -2596,9 +2611,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (EmitNodeInfo & OPFL_MemRefs) { // Only attach load or store memory operands if the generated // instruction may load or store. - const TargetInstrDesc &TID = TM.getInstrInfo()->get(TargetOpc); - bool mayLoad = TID.mayLoad(); - bool mayStore = TID.mayStore(); + const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc); + bool mayLoad = MCID.mayLoad(); + bool mayStore = MCID.mayStore(); unsigned NumMemRefs = 0; for (SmallVector<MachineMemOperand*, 2>::const_iterator I = diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cf6069a..2626ac3 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -81,6 +81,9 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::MUL_I32] = "__mulsi3"; Names[RTLIB::MUL_I64] = "__muldi3"; Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::MULO_I32] = "__mulosi4"; + Names[RTLIB::MULO_I64] = "__mulodi4"; + Names[RTLIB::MULO_I128] = "__muloti4"; Names[RTLIB::SDIV_I8] = "__divqi3"; Names[RTLIB::SDIV_I16] = "__divhi3"; Names[RTLIB::SDIV_I32] = "__divsi3"; @@ -136,6 +139,10 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::REM_F64] = "fmod"; Names[RTLIB::REM_F80] = "fmodl"; Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::FMA_F32] = "fmaf"; + Names[RTLIB::FMA_F64] = "fma"; + Names[RTLIB::FMA_F80] = "fmal"; + Names[RTLIB::FMA_PPCF128] = "fmal"; Names[RTLIB::POWI_F32] = "__powisf2"; Names[RTLIB::POWI_F64] = "__powidf2"; Names[RTLIB::POWI_F80] = "__powixf2"; @@ -673,10 +680,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, NewVT = EltTy; IntermediateVT = NewVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + EVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -965,8 +978,14 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -1762,9 +1781,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::BITCAST: // If this is an FP->Int bitcast and if the sign bit is the only // thing demanded, turn this into a FGETSIGN. - if (NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && - Op.getOperand(0).getValueType().isFloatingPoint() && - !Op.getOperand(0).getValueType().isVector()) { + if (!Op.getOperand(0).getValueType().isVector() && + NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && + Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { @@ -1902,7 +1921,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // comparisons. if (isa<ConstantSDNode>(N0.getNode())) return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); - + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -2608,7 +2627,6 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { TargetLowering::ConstraintType TargetLowering::getConstraintType(const std::string &Constraint) const { - // FIXME: lots more standard ones to handle. if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; @@ -2661,9 +2679,9 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { - + if (Constraint.length() > 1) return; - + char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; @@ -2722,13 +2740,6 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } } -std::vector<unsigned> TargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - return std::vector<unsigned>(); -} - - std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { @@ -2853,7 +2864,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } - + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. if (const StructType *STy = dyn_cast<StructType>(OpTy)) if (STy->getNumElements() == 1) @@ -2955,10 +2966,13 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> InputRC = + getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || - (OpInfo.ConstraintVT.getSizeInBits() != - Input.ConstraintVT.getSizeInBits())) { + (MatchRC.second != InputRC.second)) { report_fatal_error("Unsupported asm: input constraint" " with a matching output constraint of" " incompatible type!"); @@ -3204,6 +3218,32 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } +/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication +/// with the multiplicative inverse of the constant. +SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, + SelectionDAG &DAG) const { + ConstantSDNode *C = cast<ConstantSDNode>(Op2); + APInt d = C->getAPIntValue(); + assert(d != 0 && "Division by zero!"); + + // Shift the value upfront if it is even, so the LSB is one. + unsigned ShAmt = d.countTrailingZeros(); + if (ShAmt) { + // TODO: For UDIV use SRL instead of SRA. + SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType())); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt); + d = d.ashr(ShAmt); + } + + // Calculate the multiplicative inverse, using Newton's method. + APInt t, xn = d; + while ((t = d*xn) != 1) + xn *= APInt(d.getBitWidth(), 2) - t; + + Op2 = DAG.getConstant(xn, Op1.getValueType()); + return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); +} + /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: |