diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2010-05-04 16:11:02 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2010-05-04 16:11:02 +0000 |
commit | 750ce4d809c7e2a298a389a512a17652ff5be3f2 (patch) | |
tree | 70fbd90da02177c8e6ef82adba9fa8ace285a5e3 /lib/CodeGen/SelectionDAG/DAGCombiner.cpp | |
parent | 5f970ec96e421f64db6b1c6509a902ea73d98cc7 (diff) | |
download | FreeBSD-src-750ce4d809c7e2a298a389a512a17652ff5be3f2.zip FreeBSD-src-750ce4d809c7e2a298a389a512a17652ff5be3f2.tar.gz |
Update LLVM to r103004.
Diffstat (limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 612 |
1 files changed, 558 insertions, 54 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a336e0a..3639f80 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -129,6 +129,14 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); + SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); + SDValue SExtPromoteOperand(SDValue Op, EVT PVT); + SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); + SDValue PromoteIntBinOp(SDValue Op); + SDValue PromoteIntShiftOp(SDValue Op); + SDValue PromoteExtend(SDValue Op); + bool PromoteLoad(SDValue Op); /// combine - call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the @@ -254,24 +262,28 @@ namespace { /// looking for a better chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); - /// getShiftAmountTy - Returns a type large enough to hold any valid - /// shift amount - before type legalization these can be huge. - EVT getShiftAmountTy() { - return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); - } - -public: + public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), - TLI(D.getTargetLoweringInfo()), - Level(Unrestricted), - OptLevel(OL), - LegalOperations(false), - LegalTypes(false), - AA(A) {} + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} /// Run - runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); + + SelectionDAG &getDAG() const { return DAG; } + + /// getShiftAmountTy - Returns a type large enough to hold any valid + /// shift amount - before type legalization these can be huge. + EVT getShiftAmountTy() { + return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); + } + + /// isTypeLegal - This method returns true if we are running before type + /// legalization or if the specified VT is legal. + bool isTypeLegal(const EVT &VT) { + if (!LegalTypes) return true; + return TLI.isTypeLegal(VT); + } }; } @@ -577,9 +589,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, return SDValue(N, 0); } -void -DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt & - TLO) { +void DAGCombiner:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. WorkListRemover DeadNodes(*this); @@ -609,7 +620,7 @@ DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt & /// it can be simplified or if things it uses can be simplified by bit /// propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { - TargetLowering::TargetLoweringOpt TLO(DAG); + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); APInt KnownZero, KnownOne; if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) return false; @@ -629,6 +640,274 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { return true; } +void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { + DebugLoc dl = Load->getDebugLoc(); + EVT VT = Load->getValueType(0); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); + + DEBUG(dbgs() << "\nReplacing.9 "; + Load->dump(&DAG); + dbgs() << "\nWith: "; + Trunc.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1), + &DeadNodes); + removeFromWorkList(Load); + DAG.DeleteNode(Load); + AddToWorkList(Trunc.getNode()); +} + +SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { + Replace = false; + DebugLoc dl = Op.getDebugLoc(); + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) + : LD->getExtensionType(); + Replace = true; + return DAG.getExtLoad(ExtType, dl, PVT, + LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + } + + unsigned Opc = Op.getOpcode(); + switch (Opc) { + default: break; + case ISD::AssertSext: + return DAG.getNode(ISD::AssertSext, dl, PVT, + SExtPromoteOperand(Op.getOperand(0), PVT), + Op.getOperand(1)); + case ISD::AssertZext: + return DAG.getNode(ISD::AssertZext, dl, PVT, + ZExtPromoteOperand(Op.getOperand(0), PVT), + Op.getOperand(1)); + case ISD::Constant: { + unsigned ExtOpc = + Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + return DAG.getNode(ExtOpc, dl, PVT, Op); + } + } + + if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) + return SDValue(); + return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); +} + +SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { + if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) + return SDValue(); + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (NewOp.getNode() == 0) + return SDValue(); + AddToWorkList(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, + DAG.getValueType(OldVT)); +} + +SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (NewOp.getNode() == 0) + return SDValue(); + AddToWorkList(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getZeroExtendInReg(NewOp, dl, OldVT); +} + +/// PromoteIntBinOp - Promote the specified integer binary operation if the +/// target indicates it is beneficial. e.g. On x86, it's usually better to +/// promote i16 operations to i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + bool Replace0 = false; + SDValue N0 = Op.getOperand(0); + SDValue NN0 = PromoteOperand(N0, PVT, Replace0); + if (NN0.getNode() == 0) + return SDValue(); + + bool Replace1 = false; + SDValue N1 = Op.getOperand(1); + SDValue NN1 = PromoteOperand(N1, PVT, Replace1); + if (NN1.getNode() == 0) + return SDValue(); + + AddToWorkList(NN0.getNode()); + AddToWorkList(NN1.getNode()); + + if (Replace0) + ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); + if (Replace1) + ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); + + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Opc, dl, PVT, NN0, NN1)); + } + return SDValue(); +} + +/// PromoteIntShiftOp - Promote the specified integer shift operation if the +/// target indicates it is beneficial. e.g. On x86, it's usually better to +/// promote i16 operations to i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + bool Replace = false; + SDValue N0 = Op.getOperand(0); + if (Opc == ISD::SRA) + N0 = SExtPromoteOperand(Op.getOperand(0), PVT); + else if (Opc == ISD::SRL) + N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); + else + N0 = PromoteOperand(N0, PVT, Replace); + if (N0.getNode() == 0) + return SDValue(); + + AddToWorkList(N0.getNode()); + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); + + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); + } + return SDValue(); +} + +SDValue DAGCombiner::PromoteExtend(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); + } + return SDValue(); +} + +bool DAGCombiner::PromoteLoad(SDValue Op) { + if (!LegalOperations) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return false; + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return false; + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + DebugLoc dl = Op.getDebugLoc(); + SDNode *N = Op.getNode(); + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) + : LD->getExtensionType(); + SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, + LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); + + DEBUG(dbgs() << "\nPromoting "; + N->dump(&DAG); + dbgs() << "\nTo: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes); + removeFromWorkList(N); + DAG.DeleteNode(N); + AddToWorkList(Result.getNode()); + return true; + } + return false; +} + + //===----------------------------------------------------------------------===// // Main DAG Combiner implementation //===----------------------------------------------------------------------===// @@ -732,7 +1011,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { } SDValue DAGCombiner::visit(SDNode *N) { - switch(N->getOpcode()) { + switch (N->getOpcode()) { default: break; case ISD::TokenFactor: return visitTokenFactor(N); case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); @@ -817,6 +1096,35 @@ SDValue DAGCombiner::combine(SDNode *N) { } } + // If nothing happened still, try promoting the operation. + if (RV.getNode() == 0) { + switch (N->getOpcode()) { + default: break; + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + RV = PromoteIntBinOp(SDValue(N, 0)); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + RV = PromoteIntShiftOp(SDValue(N, 0)); + break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + RV = PromoteExtend(SDValue(N, 0)); + break; + case ISD::LOAD: + if (PromoteLoad(SDValue(N, 0))) + RV = SDValue(N, 0); + break; + } + } + // If N is a commutative binary node, try commuting it to enable more // sdisel CSE. if (RV.getNode() == 0 && @@ -1720,8 +2028,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // into a vsetcc. EVT Op0VT = N0.getOperand(0).getValueType(); if ((N0.getOpcode() == ISD::ZERO_EXTEND || - N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || + // Avoid infinite looping with PromoteIntBinOp. + (N0.getOpcode() == ISD::ANY_EXTEND && + (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && @@ -2579,7 +2889,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { HiBitsMask); } - return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); + if (N1C) { + SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSHL.getNode()) + return NewSHL; + } + + return SDValue(); } SDValue DAGCombiner::visitSRA(SDNode *N) { @@ -2693,7 +3009,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); - return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); + if (N1C) { + SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSRA.getNode()) + return NewSRA; + } + + return SDValue(); } SDValue DAGCombiner::visitSRL(SDNode *N) { @@ -2731,6 +3053,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } + + // fold (srl (shl x, c), c) -> (and x, cst2) + if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && + N0.getValueSizeInBits() <= 64) { + uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, VT)); + } + // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -2739,10 +3070,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) return DAG.getUNDEF(VT); - SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, - N0.getOperand(0), N1); - AddToWorkList(SmallShift.getNode()); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + N0.getOperand(0), N1); + AddToWorkList(SmallShift.getNode()); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + } } // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign @@ -3205,24 +3538,40 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::SETCC) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. - if (VT.isVector() && + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations) { + EVT N0VT = N0.getOperand(0).getValueType(); // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() && - - // Only do this before legalize for now. - !LegalOperations) { - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); + if (VT.getSizeInBits() == N0VT.getSizeInBits()) + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + else { + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } } - + // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + unsigned ElementWidth = VT.getScalarType().getSizeInBits(); SDValue NegOne = - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), NegOne, DAG.getConstant(0, VT), @@ -3624,7 +3973,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() && - cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits && + cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -3694,7 +4043,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // if x is small enough. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits) + if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && + (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); } @@ -3779,7 +4129,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); // fold (truncate (ext x)) -> (ext x) or (truncate x) or x - if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND|| + if (N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { if (N0.getOperand(0).getValueType().bitsLT(VT)) // if the source is smaller than the dest, we still need an extend @@ -3805,7 +4156,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) - return ReduceLoadWidth(N); + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) + return ReduceLoadWidth(N); + return SDValue(); } static SDNode *getBuildPairElt(SDNode *N, unsigned i) { @@ -3949,7 +4302,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { VT.isInteger() && !VT.isVector()) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); - if (TLI.isTypeLegal(IntXVT) || !LegalTypes) { + if (isTypeLegal(IntXVT)) { SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), IntXVT, N0.getOperand(1)); AddToWorkList(X.getNode()); @@ -4075,8 +4428,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (Op.getOpcode() == ISD::UNDEF) continue; EltIsUndef = false; - NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). - zextOrTrunc(SrcBitSize).zext(DstBitSize)); + NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). + zextOrTrunc(SrcBitSize).zext(DstBitSize); } if (EltIsUndef) @@ -4464,7 +4817,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); // fold (fp_round_inreg c1fp) -> c1fp - if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) { + if (N0CFP && isTypeLegal(EVT)) { SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); } @@ -4676,7 +5029,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() == Op1.getOpcode()) { // Avoid missing important xor optimizations. SDValue Tmp = visitXOR(TheXor); - if (Tmp.getNode()) { + if (Tmp.getNode() && Tmp.getNode() != TheXor) { DEBUG(dbgs() << "\nReplacing.8 "; TheXor->dump(&DAG); dbgs() << "\nWith: "; @@ -5145,6 +5498,136 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { return SDValue(); } +/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the +/// load is having specific bytes cleared out. If so, return the byte size +/// being masked out and the shift amount. +static std::pair<unsigned, unsigned> +CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { + std::pair<unsigned, unsigned> Result(0, 0); + + // Check for the structure we're looking for. + if (V->getOpcode() != ISD::AND || + !isa<ConstantSDNode>(V->getOperand(1)) || + !ISD::isNormalLoad(V->getOperand(0).getNode())) + return Result; + + // Check the chain and pointer. + LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); + if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. + + // The store should be chained directly to the load or be an operand of a + // tokenfactor. + if (LD == Chain.getNode()) + ; // ok. + else if (Chain->getOpcode() != ISD::TokenFactor) + return Result; // Fail. + else { + bool isOk = false; + for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) + if (Chain->getOperand(i).getNode() == LD) { + isOk = true; + break; + } + if (!isOk) return Result; + } + + // This only handles simple types. + if (V.getValueType() != MVT::i16 && + V.getValueType() != MVT::i32 && + V.getValueType() != MVT::i64) + return Result; + + // Check the constant mask. Invert it so that the bits being masked out are + // 0 and the bits being kept are 1. Use getSExtValue so that leading bits + // follow the sign bit for uniformity. + uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); + unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); + if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. + unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); + if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. + if (NotMaskLZ == 64) return Result; // All zero mask. + + // See if we have a continuous run of bits. If so, we have 0*1+0* + if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) + return Result; + + // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. + if (V.getValueType() != MVT::i64 && NotMaskLZ) + NotMaskLZ -= 64-V.getValueSizeInBits(); + + unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; + switch (MaskedBytes) { + case 1: + case 2: + case 4: break; + default: return Result; // All one mask, or 5-byte mask. + } + + // Verify that the first bit starts at a multiple of mask so that the access + // is aligned the same as the access width. + if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; + + Result.first = MaskedBytes; + Result.second = NotMaskTZ/8; + return Result; +} + + +/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that +/// provides a value as specified by MaskInfo. If so, replace the specified +/// store with a narrower store of truncated IVal. +static SDNode * +ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, + SDValue IVal, StoreSDNode *St, + DAGCombiner *DC) { + unsigned NumBytes = MaskInfo.first; + unsigned ByteShift = MaskInfo.second; + SelectionDAG &DAG = DC->getDAG(); + + // Check to see if IVal is all zeros in the part being masked in by the 'or' + // that uses this. If not, this is not a replacement. + APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), + ByteShift*8, (ByteShift+NumBytes)*8); + if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; + + // Check that it is legal on the target to do this. It is legal if the new + // VT we're shrinking to (i8/i16/i32) is legal or we're still before type + // legalization. + MVT VT = MVT::getIntegerVT(NumBytes*8); + if (!DC->isTypeLegal(VT)) + return 0; + + // Okay, we can do this! Replace the 'St' store with a store of IVal that is + // shifted by ByteShift and truncated down to NumBytes. + if (ByteShift) + IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, + DAG.getConstant(ByteShift*8, DC->getShiftAmountTy())); + + // Figure out the offset for the store and the alignment of the access. + unsigned StOffset; + unsigned NewAlign = St->getAlignment(); + + if (DAG.getTargetLoweringInfo().isLittleEndian()) + StOffset = ByteShift; + else + StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; + + SDValue Ptr = St->getBasePtr(); + if (StOffset) { + Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), + Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); + NewAlign = MinAlign(NewAlign, StOffset); + } + + // Truncate down to the new size. + IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); + + ++OpsNarrowed; + return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, + St->getSrcValue(), St->getSrcValueOffset()+StOffset, + false, false, NewAlign).getNode(); +} + /// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is /// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some @@ -5164,6 +5647,28 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); unsigned Opc = Value.getOpcode(); + + // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst + // is a byte mask indicating a consecutive number of bytes, check to see if + // Y is known to provide just those bytes. If so, we try to replace the + // load + replace + store sequence with a single (narrower) store, which makes + // the load dead. + if (Opc == ISD::OR) { + std::pair<unsigned, unsigned> MaskedLoad; + MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); + if (MaskedLoad.first) + if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(1), ST,this)) + return SDValue(NewST, 0); + + // Or is commutative, so try swapping X and Y. + MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); + if (MaskedLoad.first) + if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(0), ST,this)) + return SDValue(NewST, 0); + } + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || Value.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); @@ -5211,8 +5716,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); - if (NewAlign < - TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext()))) + const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); + if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy)) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), @@ -5282,8 +5787,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { case MVT::ppcf128: break; case MVT::f32: - if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations && - !ST->isVolatile()) || + if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), MVT::i32); @@ -5294,7 +5798,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } break; case MVT::f64: - if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations && + if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). @@ -5551,7 +6055,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { InVec = InVec.getOperand(0); if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); - Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems; + Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; } } @@ -5659,7 +6163,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } // Add count and size info. - if (!TLI.isTypeLegal(VT) && LegalTypes) + if (!isTypeLegal(VT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. @@ -6287,7 +6791,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { /// FindBaseOffset - Return true if base is a frame index, which is known not // to alias with anything but itself. Provides base object and offset as results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, - GlobalValue *&GV, void *&CV) { + const GlobalValue *&GV, void *&CV) { // Assume it is a primitive operation. Base = Ptr; Offset = 0; GV = 0; CV = 0; @@ -6335,7 +6839,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; - GlobalValue *GV1, *GV2; + const GlobalValue *GV1, *GV2; void *CV1, *CV2; bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); |