diff options
author | dim <dim@FreeBSD.org> | 2012-04-14 13:54:10 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2012-04-14 13:54:10 +0000 |
commit | 1fc08f5e9ef733ef1ce6f363fecedc2260e78974 (patch) | |
tree | 19c69a04768629f2d440944b71cbe90adae0b615 /lib/CodeGen/SelectionDAG | |
parent | 07637c87f826cdf411f0673595e9bc92ebd793f2 (diff) | |
download | FreeBSD-src-1fc08f5e9ef733ef1ce6f363fecedc2260e78974.zip FreeBSD-src-1fc08f5e9ef733ef1ce6f363fecedc2260e78974.tar.gz |
Vendor import of llvm trunk r154661:
http://llvm.org/svn/llvm-project/llvm/trunk@r154661
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
27 files changed, 4721 insertions, 2974 deletions
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 2282f0e..a6bdc3b 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -10,24 +10,16 @@ add_llvm_library(LLVMSelectionDAG LegalizeTypesGeneric.cpp LegalizeVectorOps.cpp LegalizeVectorTypes.cpp + ResourcePriorityQueue.cpp ScheduleDAGFast.cpp - ScheduleDAGList.cpp ScheduleDAGRRList.cpp ScheduleDAGSDNodes.cpp SelectionDAG.cpp SelectionDAGBuilder.cpp + SelectionDAGDumper.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp + ScheduleDAGVLIW.cpp TargetLowering.cpp TargetSelectionDAGInfo.cpp ) - -add_llvm_library_dependencies(LLVMSelectionDAG - LLVMAnalysis - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7b87868..d1b998f 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22,7 +22,6 @@ #include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" @@ -64,7 +63,24 @@ namespace { bool LegalTypes; // Worklist of all of the nodes that need to be simplified. - std::vector<SDNode*> WorkList; + // + // This has the semantics that when adding to the worklist, + // the item added must be next to be processed. It should + // also only appear once. The naive approach to this takes + // linear time. + // + // To reduce the insert/remove time to logarithmic, we use + // a set and a vector to maintain our worklist. + // + // The set contains the items on the worklist, but does not + // maintain the order they should be visited. + // + // The vector maintains the order nodes should be visited, but may + // contain duplicate or removed nodes. When choosing a node to + // visit, we pop off the order stack until we find an item that is + // also in the contents set. All operations are O(log N). + SmallPtrSet<SDNode*, 64> WorkListContents; + SmallVector<SDNode*, 64> WorkListOrder; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; @@ -84,18 +100,17 @@ namespace { SDValue visit(SDNode *N); public: - /// AddToWorkList - Add to the work list making sure it's instance is at the - /// the back (next to be processed.) + /// AddToWorkList - Add to the work list making sure its instance is at the + /// back (next to be processed.) void AddToWorkList(SDNode *N) { - removeFromWorkList(N); - WorkList.push_back(N); + WorkListContents.insert(N); + WorkListOrder.push_back(N); } /// removeFromWorkList - remove all instances of N from the worklist. /// void removeFromWorkList(SDNode *N) { - WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N), - WorkList.end()); + WorkListContents.erase(N); } SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, @@ -159,7 +174,9 @@ namespace { SDValue visitADD(SDNode *N); SDValue visitSUB(SDNode *N); SDValue visitADDC(SDNode *N); + SDValue visitSUBC(SDNode *N); SDValue visitADDE(SDNode *N); + SDValue visitSUBE(SDNode *N); SDValue visitMUL(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitUDIV(SDNode *N); @@ -181,7 +198,9 @@ namespace { SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitCTLZ(SDNode *N); + SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); + SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); @@ -279,7 +298,7 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted), + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} /// Run - runs the dag combiner on all nodes in the work list @@ -362,6 +381,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { /// specified expression for the same cost as the expression itself, or 2 if we /// can compute the negated form more cheaply than the expression itself. static char isNegatibleForFree(SDValue Op, bool LegalOperations, + const TargetLowering &TLI, + const TargetOptions *Options, unsigned Depth = 0) { // No compile time optimizations on this type. if (Op.getValueType() == MVT::ppcf128) @@ -384,34 +405,44 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, return LegalOperations ? 0 : 1; case ISD::FADD: // FIXME: determine better conditions for this xform. - if (!UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath) return 0; + + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) + return 0; // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) return V; // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, + Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) return 1; case ISD::FMUL: case ISD::FDIV: - if (HonorSignDependentRoundingFPMath()) return 0; + if (Options->HonorSignDependentRoundingFPMath()) return 0; // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) return V; - return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, + Depth + 1); case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1); + return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, + Depth + 1); } } @@ -435,10 +466,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } case ISD::FADD: // FIXME: determine better conditions for this xform. - assert(UnsafeFPMath); + assert(DAG.getTarget().Options.UnsafeFPMath); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options, Depth+1)) return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -450,7 +483,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, Op.getOperand(0)); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - assert(UnsafeFPMath); + assert(DAG.getTarget().Options.UnsafeFPMath); // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) @@ -463,10 +496,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::FMUL: case ISD::FDIV: - assert(!HonorSignDependentRoundingFPMath()); + assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options, Depth+1)) return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -944,14 +979,13 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { void DAGCombiner::Run(CombineLevel AtLevel) { // set the instance variables, so that the various visit routines may use it. Level = AtLevel; - LegalOperations = Level >= NoIllegalOperations; - LegalTypes = Level >= NoIllegalTypes; + LegalOperations = Level >= AfterLegalizeVectorOps; + LegalTypes = Level >= AfterLegalizeTypes; // Add all the dag nodes to the worklist. - WorkList.reserve(DAG.allnodes_size()); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) - WorkList.push_back(I); + AddToWorkList(I); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any @@ -962,11 +996,17 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // done. Set it to null to avoid confusion. DAG.setRoot(SDValue()); - // while the worklist isn't empty, inspect the node on the end of it and + // while the worklist isn't empty, find a node and // try and combine it. - while (!WorkList.empty()) { - SDNode *N = WorkList.back(); - WorkList.pop_back(); + while (!WorkListContents.empty()) { + SDNode *N; + // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. + // In order to avoid a linear scan, we use a set (O(log N)) to hold what the + // worklist *should* contain, and check the node we want to visit is should + // actually be visited. + do { + N = WorkListOrder.pop_back_val(); + } while (!WorkListContents.erase(N)); // If N has no uses, it is dead. Make sure to revisit all N's operands once // N is deleted from the DAG, since they too may now be dead or may have a @@ -1050,7 +1090,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ADD: return visitADD(N); case ISD::SUB: return visitSUB(N); case ISD::ADDC: return visitADDC(N); + case ISD::SUBC: return visitSUBC(N); case ISD::ADDE: return visitADDE(N); + case ISD::SUBE: return visitSUBE(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -1071,7 +1113,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SRA: return visitSRA(N); case ISD::SRL: return visitSRL(N); case ISD::CTLZ: return visitCTLZ(N); + case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); + case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); @@ -1408,16 +1452,14 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isInteger() && !VT.isVector()) { APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || - (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); } } @@ -1486,8 +1528,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. - if (N->hasNUsesOfValue(0, 1)) - return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0), + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), MVT::Glue)); @@ -1503,16 +1545,14 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || - (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), MVT::Glue)); @@ -1535,7 +1575,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // fold (adde x, y, false) -> (addc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1); return SDValue(); } @@ -1645,6 +1685,51 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSUBC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // If the flag result is dead, turn this into an SUB. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // fold (subc x, x) -> 0 + no borrow + if (N0 == N1) + return CombineTo(N, DAG.getConstant(0, VT), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // fold (subc x, 0) -> x + no borrow + if (N1C && N1C->isNullValue()) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow + if (N0C && N0C->isAllOnesValue()) + return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (sube x, y, false) -> (subc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1756,7 +1841,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (N0C && N1C && !N1C->isNullValue()) return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); // fold (sdiv X, 1) -> X - if (N1C && N1C->getSExtValue() == 1LL) + if (N1C && N1C->getAPIntValue() == 1LL) return N0; // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) @@ -1770,17 +1855,15 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { N0, N1); } // fold (sdiv X, pow2) -> simple ops after legalize - if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() && - (isPowerOf2_64(N1C->getSExtValue()) || - isPowerOf2_64(-N1C->getSExtValue()))) { + if (N1C && !N1C->isNullValue() && + (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. if (TLI.isPow2DivCheap()) return SDValue(); - int64_t pow2 = N1C->getSExtValue(); - int64_t abs2 = pow2 > 0 ? pow2 : -pow2; - unsigned lg2 = Log2_64(abs2); + unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, @@ -1800,7 +1883,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. - if (pow2 > 0) + if (N1C->getAPIntValue().isNonNegative()) return SRA; AddToWorkList(SRA.getNode()); @@ -1810,8 +1893,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // if integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. - if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) && - !TLI.isIntDivCheap()) { + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { SDValue Op = BuildSDIV(N); if (Op.getNode()) return Op; } @@ -2250,6 +2332,67 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ORNode, N0.getOperand(1)); } + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) + // Only perform this optimization after type legalization and before + // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by + // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and + // we don't want to undo this promotion. + // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper + // on scalars. + if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) + && Level == AfterLegalizeVectorOps) { + SDValue In0 = N0.getOperand(0); + SDValue In1 = N1.getOperand(0); + EVT In0Ty = In0.getValueType(); + EVT In1Ty = In1.getValueType(); + // If both incoming values are integers, and the original types are the same. + if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1); + SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op); + AddToWorkList(Op.getNode()); + return BC; + } + } + + // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). + // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) + // If both shuffles use the same mask, and both shuffle within a single + // vector, then it is worthwhile to move the swizzle after the operation. + // The type-legalizer generates this pattern when loading illegal + // vector types from memory. In many cases this allows additional shuffle + // optimizations. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N0.getOperand(1).getOpcode() == ISD::UNDEF && + N1.getOperand(1).getOpcode() == ISD::UNDEF) { + ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); + ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); + + assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && + "Inputs to shuffles are not the same type"); + + unsigned NumElts = VT.getVectorNumElements(); + + // Check that both shuffles use the same mask. The masks are known to be of + // the same length because the result vector type is the same. + bool SameMask = true; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx0 = SVN0->getMaskElt(i); + int Idx1 = SVN1->getMaskElt(i); + if (Idx0 != Idx1) { + SameMask = false; + break; + } + } + + if (SameMask) { + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(Op.getNode()); + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, + DAG.getUNDEF(VT), &SVN0->getMask()[0]); + } + } + return SDValue(); } @@ -2312,6 +2455,88 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> + // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must + // already be zero by virtue of the width of the base type of the load. + // + // the 'X' node here can either be nothing or an extract_vector_elt to catch + // more cases. + if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getOperand(0).getOpcode() == ISD::LOAD) || + N0.getOpcode() == ISD::LOAD) { + LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? + N0 : N0.getOperand(0) ); + + // Get the constant (if applicable) the zero'th operand is being ANDed with. + // This can be a pure constant or a vector splat, in which case we treat the + // vector as a scalar and use the splat value. + APInt Constant = APInt::getNullValue(1); + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + Constant = C->getAPIntValue(); + } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, + SplatBitSize, HasAnyUndefs); + if (IsSplat) { + // Undef bits can contribute to a possible optimisation if set, so + // set them. + SplatValue |= SplatUndef; + + // The splat value may be something like "0x00FFFFFF", which means 0 for + // the first vector value and FF for the rest, repeating. We need a mask + // that will apply equally to all members of the vector, so AND all the + // lanes of the constant together. + EVT VT = Vector->getValueType(0); + unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + Constant = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i) + Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + } + } + + // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is + // actually legal and isn't going to get expanded, else this is a false + // optimisation. + bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, + Load->getMemoryVT()); + + // Resize the constant to the same size as the original memory access before + // extension. If it is still the AllOnesValue then this AND is completely + // unneeded. + Constant = + Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); + + bool B; + switch (Load->getExtensionType()) { + default: B = false; break; + case ISD::EXTLOAD: B = CanZextLoadProfitably; break; + case ISD::ZEXTLOAD: + case ISD::NON_EXTLOAD: B = true; break; + } + + if (B && Constant.isAllOnesValue()) { + // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to + // preserve semantics once we get rid of the AND. + SDValue NewLoad(Load, 0); + if (Load->getExtensionType() == ISD::EXTLOAD) { + NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, + Load->getValueType(0), Load->getDebugLoc(), + Load->getChain(), Load->getBasePtr(), + Load->getOffset(), Load->getMemoryVT(), + Load->getMemOperand()); + // Replace uses of the EXTLOAD with the new ZEXTLOAD. + CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + } + + // Fold the AND away, taking care not to fold to the old load node if we + // replaced it. + CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); @@ -3323,7 +3548,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) - if (N1C && N0.getOpcode() == ISD::SRL && + // Only fold this if the inner shift has no other uses -- if it does, folding + // this will increase the total number of instructions. + if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); if (c1 < VT.getSizeInBits()) { @@ -3603,8 +3830,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. @@ -3612,7 +3838,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. - APInt UnknownBits = ~KnownZero & Mask; + APInt UnknownBits = ~KnownZero; if (UnknownBits == 0) return DAG.getConstant(1, VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. @@ -3713,6 +3939,16 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctlz_zero_undef c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return SDValue(); +} + SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -3723,6 +3959,16 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz_zero_undef c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return SDValue(); +} + SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4108,12 +4354,17 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - if (VT.getSizeInBits() == N0VT.getSizeInBits()) + // On some architectures (such as SSE/NEON/etc) the SETCC result type is + // of the same size as the compared operands. Only optimize sext(setcc()) + // if this is the case. + EVT SVT = TLI.getSetCCResultType(N0VT); + + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == SVT.getSizeInBits()) return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); @@ -4127,11 +4378,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MatchingVectorType = EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } } } @@ -4162,6 +4415,44 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return SDValue(); } +// isTruncateOf - If N is a truncate of some other value, return true, record +// the value being truncated in Op and which of Op's bits are zero in KnownZero. +// This function computes KnownZero to avoid a duplicated call to +// ComputeMaskedBits in the caller. +static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, + APInt &KnownZero) { + APInt KnownOne; + if (N->getOpcode() == ISD::TRUNCATE) { + Op = N->getOperand(0); + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + return true; + } + + if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || + cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) + return false; + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + assert(Op0.getValueType() == Op1.getValueType()); + + ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); + ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); + if (COp0 && COp0->isNullValue()) + Op = Op1; + else if (COp1 && COp1->isNullValue()) + Op = Op0; + else + return false; + + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + + if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) + return false; + + return true; +} + SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4175,6 +4466,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0.getOperand(0)); + // fold (zext (truncate x)) -> (zext x) or + // (zext (truncate x)) -> (truncate x) + // This is valid when the truncated bits of x are already zero. + // FIXME: We should extend this to work for vectors too. + SDValue Op; + APInt KnownZero; + if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { + APInt TruncatedBits = + (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? + APInt(Op.getValueSizeInBits(), 0) : + APInt::getBitsSet(Op.getValueSizeInBits(), + N0.getValueSizeInBits(), + std::min(Op.getValueSizeInBits(), + VT.getSizeInBits())); + if (TruncatedBits == (KnownZero & TruncatedBits)) { + if (VT.bitsGT(Op.getValueType())) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); + if (VT.bitsLT(Op.getValueType())) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + + return Op; + } + } + // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { @@ -4567,6 +4882,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { switch (V.getOpcode()) { default: break; + case ISD::Constant: { + const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); + assert(CV != 0 && "Const value should be ConstSDNode."); + const APInt &CVal = CV->getAPIntValue(); + APInt NewVal = CVal & Mask; + if (NewVal != CVal) { + return DAG.getConstant(NewVal, V.getValueType()); + } + break; + } case ISD::OR: case ISD::XOR: // If the LHS or RHS don't contribute bits to the or, drop them. @@ -4705,7 +5030,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (ExtType == ISD::NON_EXTLOAD) Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), NewAlign); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), NewAlign); else Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), @@ -4844,6 +5170,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + bool isLE = TLI.isLittleEndian(); // noop truncate if (N0.getValueType() == N->getValueType(0)) @@ -4871,6 +5198,44 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0.getOperand(0); } + // Fold extract-and-trunc into a narrow extract. For example: + // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) + // i32 y = TRUNCATE(i64 x) + // -- becomes -- + // v16i8 b = BITCAST (v2i64 val) + // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) + // + // Note: We only run this optimization after type legalization (which often + // creates this pattern) and before operation legalization after which + // we need to be more careful about the vector instructions that we generate. + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + LegalTypes && !LegalOperations && N0->hasOneUse()) { + + EVT VecTy = N0.getOperand(0).getValueType(); + EVT ExTy = N0.getValueType(); + EVT TrTy = N->getValueType(0); + + unsigned NumElem = VecTy.getVectorNumElements(); + unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); + assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); + + SDValue EltNo = N0->getOperand(1); + if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + + int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); + + SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + NVT, N0.getOperand(0)); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + N->getDebugLoc(), TrTy, V, + DAG.getConstant(Index, MVT::i32)); + } + } + // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y @@ -4934,7 +5299,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), - false, false, Align); + false, false, false, Align); } return SDValue(); @@ -5004,7 +5369,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), - OrigAlign); + LN0->isInvariant(), OrigAlign); AddToWorkList(N); CombineTo(N0.getNode(), DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), @@ -5017,7 +5382,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // This often reduces constant pool loads. - if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); @@ -5247,20 +5613,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); // fold (fadd A, 0) -> A - if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N1CFP->getValueAPF().isZero()) return N0; // fold (fadd A, (fneg B)) -> (fsub A, B) - if (isNegatibleForFree(N1, LegalOperations) == 2) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) - if (isNegatibleForFree(N0, LegalOperations) == 2) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) - if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && - N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(1))) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); @@ -5285,20 +5655,39 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); // fold (fsub A, 0) -> A - if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N1CFP->getValueAPF().isZero()) return N0; // fold (fsub 0, B) -> -B - if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) { - if (isNegatibleForFree(N1, LegalOperations)) + if (DAG.getTarget().Options.UnsafeFPMath && + N0CFP && N0CFP->getValueAPF().isZero()) { + if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations)) + if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); + // If 'unsafe math' is enabled, fold + // (fsub x, (fadd x, y)) -> (fneg y) & + // (fsub x, (fadd y, x)) -> (fneg y) + if (DAG.getTarget().Options.UnsafeFPMath) { + if (N1.getOpcode() == ISD::FADD) { + SDValue N10 = N1->getOperand(0); + SDValue N11 = N1->getOperand(1); + + if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, + &DAG.getTarget().Options)) + return GetNegatedExpression(N11, DAG, LegalOperations); + else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, + &DAG.getTarget().Options)) + return GetNegatedExpression(N10, DAG, LegalOperations); + } + } + return SDValue(); } @@ -5308,6 +5697,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // fold vector ops if (VT.isVector()) { @@ -5322,10 +5712,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); // fold (fmul A, 0) -> 0 - if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N1CFP->getValueAPF().isZero()) return N1; // fold (fmul A, 0) -> 0, vector edition. - if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode())) + if (DAG.getTarget().Options.UnsafeFPMath && + ISD::isBuildVectorAllZeros(N1.getNode())) return N1; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) @@ -5336,8 +5728,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, + &DAG.getTarget().Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -5348,7 +5742,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) - if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N0.getOpcode() == ISD::FMUL && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, @@ -5363,6 +5758,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // fold vector ops if (VT.isVector()) { @@ -5374,10 +5770,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. + if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) { + // Compute the reciprocal 1.0 / c2. + APFloat N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, + DAG.getConstantFP(Recip, VT)); + } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, + &DAG.getTarget().Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -5463,7 +5879,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || + (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); @@ -5488,7 +5904,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || + (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); @@ -5630,12 +6046,13 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (isNegatibleForFree(N0, LegalOperations)) + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BITCAST && + if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && !VT.isVector() && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger()) { @@ -5671,7 +6088,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && + if (!TLI.isFAbsFree(VT) && + N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); @@ -5860,6 +6278,47 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { return SDValue(); } +/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that +/// uses N as its base pointer and that N may be folded in the load / store +/// addressing mode. +static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, + SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = Use->getValueType(0); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getValue().getValueType(); + } else + return false; + + TargetLowering::AddrMode AM; + if (N->getOpcode() == ISD::ADD) { + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else if (N->getOpcode() == ISD::SUB) { + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = -Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else + return false; + + return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); +} + /// CombineToPreIndexedLoadStore - Try turning a load / store into a /// pre-indexed load / store when the base pointer is an add or subtract /// and it has other uses besides the load / store. After the @@ -5867,7 +6326,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { /// the add / subtract in and all of its other uses are redirected to the /// new load / store. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { - if (!LegalOperations) + if (Level < AfterLegalizeDAG) return false; bool isLoad = true; @@ -5946,10 +6405,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (N->hasPredecessorHelper(Use, Visited, Worklist)) return false; - if (!((Use->getOpcode() == ISD::LOAD && - cast<LoadSDNode>(Use)->getBasePtr() == Ptr) || - (Use->getOpcode() == ISD::STORE && - cast<StoreSDNode>(Use)->getBasePtr() == Ptr))) + // If Ptr may be folded in addressing mode of other use, then it's + // not profitable to do this transformation. + if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) RealUse = true; } @@ -5999,7 +6457,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { /// load / store effectively and all of its uses are redirected to the /// new load / store. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { - if (!LegalOperations) + if (Level < AfterLegalizeDAG) return false; bool isLoad = true; @@ -6046,7 +6504,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { continue; // Try turning it into a post-indexed load / store except when - // 1) All uses are load / store ops that use it as base ptr. + // 1) All uses are load / store ops that use it as base ptr (and + // it may be folded as addressing mmode). // 2) Op must be independent of N, i.e. Op is neither a predecessor // nor a successor of N. Otherwise, if Op is folded that would // create a cycle. @@ -6069,10 +6528,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { for (SDNode::use_iterator III = Use->use_begin(), EEE = Use->use_end(); III != EEE; ++III) { SDNode *UseUse = *III; - if (!((UseUse->getOpcode() == ISD::LOAD && - cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) || - (UseUse->getOpcode() == ISD::STORE && - cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use))) + if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } @@ -6139,7 +6595,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (!LD->isVolatile()) { if (N->getValueType(1) == MVT::Other) { // Unindexed loads. - if (N->hasNUsesOfValue(0, 0)) { + if (!N->hasAnyUseOfValue(0)) { // It's not safe to use the two value CombineTo variant here. e.g. // v1, chain2 = load chain1, loc // v2, chain3 = load chain2, loc @@ -6164,7 +6620,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); - if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { + if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); @@ -6222,7 +6678,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), BetterChain, Ptr, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->isInvariant(), LD->getAlignment()); } else { ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), LD->getValueType(0), @@ -6486,7 +6942,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), - NewAlign); + LD->isInvariant(), NewAlign); SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), @@ -6546,7 +7002,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - false, false, LDAlign); + false, false, false, LDAlign); SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), NewLD, ST->getBasePtr(), @@ -6823,13 +7279,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); + EVT VT = InVec.getValueType(); + EVT NVT = N->getValueType(0); if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. SDValue InOp = InVec.getOperand(0); - EVT NVT = N->getValueType(0); if (InOp.getValueType() != NVT) { assert(InOp.getValueType().isInteger() && NVT.isInteger()); return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); @@ -6837,6 +7294,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return InOp; } + SDValue EltNo = N->getOperand(1); + bool ConstEltNo = isa<ConstantSDNode>(EltNo); + + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. + // We only perform this optimization before the op legalization phase because + // we may introduce new vector instructions which are not backed by TD patterns. + // For example on AVX, extracting elements from a wide vector without using + // extract_subvector. + if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE + && ConstEltNo && !LegalOperations) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + int NumElem = VT.getVectorNumElements(); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); + // Find the new index to extract from. + int OrigElt = SVOp->getMaskElt(Elt); + + // Extracting an undef index is undef. + if (OrigElt == -1) + return DAG.getUNDEF(NVT); + + // Select the right vector half to extract from. + if (OrigElt < NumElem) { + InVec = InVec->getOperand(0); + } else { + InVec = InVec->getOperand(1); + OrigElt -= NumElem; + } + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, + InVec, DAG.getConstant(OrigElt, MVT::i32)); + } + // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); @@ -6844,17 +7333,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) - SDValue EltNo = N->getOperand(1); - if (isa<ConstantSDNode>(EltNo)) { + if (ConstEltNo) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); bool NewLoad = false; bool BCNumEltsChanged = false; - EVT VT = InVec.getValueType(); EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + EVT BCVT = InVec.getOperand(0).getValueType(); if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); @@ -6872,12 +7368,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.getOperand(0).getValueType() == ExtVT && ISD::isNormalLoad(InVec.getOperand(0).getNode())) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + LN0 = cast<LoadSDNode>(InVec.getOperand(0)); } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) // => // (load $addr+1*size) + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + // If the bit convert changed the number of elements, it is unsafe // to examine the mask. if (BCNumEltsChanged) @@ -6888,14 +7392,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); - if (InVec.getOpcode() == ISD::BITCAST) + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + InVec = InVec.getOperand(0); + } if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; } } + // Make sure we found a non-volatile load and the extractelement is + // the only use. if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) return SDValue(); @@ -6929,9 +7440,45 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { DAG.getConstant(PtrOff, PtrType)); } - return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), Align); + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. + SDValue Load; + SDValue Chain; + if (NVT.bitsGT(LVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) + ? ISD::ZEXTLOAD : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), + NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), + LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align); + Chain = Load.getValue(1); + if (NVT.bitsLT(LVT)) + Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); + } + WorkListRemover DeadNodes(*this); + SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; + SDValue To[] = { Load, Chain }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes); + // Since we're explcitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddToWorkList(Load.getNode()); + AddUsersToWorkList(Load.getNode()); // Add users too + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorkList(N); + return SDValue(N, 0); } return SDValue(); @@ -6939,11 +7486,122 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); + // Check to see if this is a BUILD_VECTOR of a bunch of values + // which come from any_extend or zero_extend nodes. If so, we can create + // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR + // optimizations. We do not handle sign-extend because we can't fill the sign + // using shuffles. + EVT SourceType = MVT::Other; + bool AllAnyExt = true; + bool AllUndef = true; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + // Ignore undef inputs. + if (In.getOpcode() == ISD::UNDEF) continue; + AllUndef = false; + + bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; + bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; + + // Abort if the element is not an extension. + if (!ZeroExt && !AnyExt) { + SourceType = MVT::Other; + break; + } + + // The input is a ZeroExt or AnyExt. Check the original type. + EVT InTy = In.getOperand(0).getValueType(); + + // Check that all of the widened source types are the same. + if (SourceType == MVT::Other) + // First time. + SourceType = InTy; + else if (InTy != SourceType) { + // Multiple income types. Abort. + SourceType = MVT::Other; + break; + } + + // Check if all of the extends are ANY_EXTENDs. + AllAnyExt &= AnyExt; + } + + if (AllUndef) + return DAG.getUNDEF(VT); + + // In order to have valid types, all of the inputs must be extended from the + // same source type and all of the inputs must be any or zero extend. + // Scalar sizes must be a power of two. + EVT OutScalarTy = N->getValueType(0).getScalarType(); + bool ValidTypes = SourceType != MVT::Other && + isPowerOf2_32(OutScalarTy.getSizeInBits()) && + isPowerOf2_32(SourceType.getSizeInBits()); + + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may create bit-casts which + // will be type-legalized to complex code sequences. + // We perform this optimization only before the operation legalizer because we + // may introduce illegal operations. + // Create a new simpler BUILD_VECTOR sequence which other optimizations can + // turn into a single shuffle instruction. + if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) && + ValidTypes) { + bool isLE = TLI.isLittleEndian(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): + DAG.getConstant(0, SourceType); + + unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); + SmallVector<SDValue, 8> Ops(NewBVElems, Filler); + + // Populate the new build_vector + for (unsigned i=0; i < N->getNumOperands(); ++i) { + SDValue Cast = N->getOperand(i); + assert((Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); + SDValue In; + if (Cast.getOpcode() == ISD::UNDEF) + In = DAG.getUNDEF(SourceType); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); + assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && + "Invalid vector size"); + // Check if the new vector type is legal. + if (!isTypeLegal(VecVT)) return SDValue(); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + VecVT, &Ops[0], Ops.size()); + + // The new BUILD_VECTOR node has the potential to be further optimized. + AddToWorkList(BV.getNode()); + // Bitcast to the desired type. + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); + } // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from // at most two distinct vectors, turn this into a shuffle node. + + // May only combine to shuffle after legalize if shuffle is legal. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) + return SDValue(); + SDValue VecIn1, VecIn2; for (unsigned i = 0; i != NumInScalars; ++i) { // Ignore undef inputs. @@ -6957,15 +7615,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { break; } - // If the input vector type disagrees with the result of the build_vector, - // we can't make a shuffle. + // We allow up to two distinct input vectors. SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); - if (ExtractedFromVec.getValueType() != VT) { - VecIn1 = VecIn2 = SDValue(0, 0); - break; - } - - // Otherwise, remember this. We allow up to two distinct input vectors. if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) continue; @@ -6980,7 +7631,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } } - // If everything is good, we can make a shuffle operation. + // If everything is good, we can make a shuffle operation. if (VecIn1.getNode()) { SmallVector<int, 8> Mask; for (unsigned i = 0; i != NumInScalars; ++i) { @@ -7006,14 +7657,39 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { Mask.push_back(Idx+NumInScalars); } - // Add count and size info. + // We can't generate a shuffle node with mismatched input and output types. + // Attempt to transform a single input vector to the correct type. + if ((VT != VecIn1.getValueType())) { + // We don't support shuffeling between TWO values of different types. + if (VecIn2.getNode() != 0) + return SDValue(); + + // We only support widening of vectors which are half the size of the + // output registers. For example XMM->YMM widening on X86 with AVX. + if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) + return SDValue(); + + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + } + + // If VecIn2 is unused then change it to undef. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + + // Check that we were able to transform all incoming values to the same type. + if (VecIn2.getValueType() != VecIn1.getValueType() || + VecIn1.getValueType() != VT) + return SDValue(); + + // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. if (!isTypeLegal(VT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. SDValue Ops[2]; Ops[0] = VecIn1; - Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + Ops[1] = VecIn2; return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); } @@ -7045,19 +7721,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) return SDValue(); - // Combine: - // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) - // Into: - // indicies are equal => V1 - // otherwise => (extract_subvec V1, ExtIdx) - // - SDValue InsIdx = N->getOperand(1); - SDValue ExtIdx = V->getOperand(2); + // Only handle cases where both indexes are constants with the same type. + ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); + ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); - if (InsIdx == ExtIdx) - return V->getOperand(1); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, - V->getOperand(0), N->getOperand(1)); + if (InsIdx && ExtIdx && + InsIdx->getValueType(0).getSizeInBits() <= 64 && + ExtIdx->getValueType(0).getSizeInBits() <= 64) { + // Combine: + // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) + // Into: + // indices are equal => V1 + // otherwise => (extract_subvec V1, ExtIdx) + if (InsIdx->getZExtValue() == ExtIdx->getZExtValue()) + return V->getOperand(1); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, + V->getOperand(0), N->getOperand(1)); + } } return SDValue(); @@ -7068,15 +7748,63 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); + + // Canonicalize shuffle undef, undef -> undef + if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); - assert(N0.getValueType().getVectorNumElements() == NumElts && - "Vector shuffle must be normalized in DAG"); + // Canonicalize shuffle v, v -> v, undef + if (N0 == N1) { + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) Idx -= NumElts; + NewMask.push_back(Idx); + } + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT), + &NewMask[0]); + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N0.getOpcode() == ISD::UNDEF) { + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= 0) { + if (Idx < (int)NumElts) + Idx += NumElts; + else + Idx -= NumElts; + } + NewMask.push_back(Idx); + } + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), + &NewMask[0]); + } - // FIXME: implement canonicalizations from DAG.getVectorShuffle() + // Remove references to rhs if it is undef + if (N1.getOpcode() == ISD::UNDEF) { + bool Changed = false; + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) { + Idx = -1; + Changed = true; + } + NewMask.push_back(Idx); + } + if (Changed) + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]); + } // If it is a splat, check if the argument vector is another splat or a // build_vector with all scalar elements the same. - ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { SDNode *V = N0.getNode(); @@ -7115,6 +7843,40 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return N0; } } + + // If this shuffle node is simply a swizzle of another shuffle node, + // and it reverses the swizzle of the previous shuffle then we can + // optimize shuffle(shuffle(x, undef), undef) -> x. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N1.getOpcode() == ISD::UNDEF) { + + ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + + // Shuffle nodes can only reverse shuffles with a single non-undef value. + if (N0.getOperand(1).getOpcode() != ISD::UNDEF) + return SDValue(); + + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + assert(Idx < (int)NumElts && "Index references undef operand"); + // Next, this index comes from the first value, which is the incoming + // shuffle. Adopt the incoming index. + if (Idx >= 0) + Idx = OtherSV->getMaskElt(Idx); + + // The combined shuffle must map each index to itself. + if (Idx >= 0 && (unsigned)Idx != i) + return SDValue(); + } + + return OtherSV->getOperand(0); + } + return SDValue(); } @@ -7190,7 +7952,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue Elt = RHS.getOperand(i); if (!isa<ConstantSDNode>(Elt)) return SDValue(); - else if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) + + if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) Indices.push_back(i); else if (cast<ConstantSDNode>(Elt)->isNullValue()) Indices.push_back(NumElts); @@ -7261,8 +8024,19 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } EVT VT = LHSOp.getValueType(); - assert(RHSOp.getValueType() == VT && - "SimplifyVBinOp with different BUILD_VECTOR element types"); + EVT RVT = RHSOp.getValueType(); + if (RVT != VT) { + // Integer BUILD_VECTOR operands may have types larger than the element + // size (e.g., when the element type is not legal). Prior to type + // legalization, the types may not match between the two BUILD_VECTORS. + // Truncate one of the operands to make them match. + if (RVT.getSizeInBits() > VT.getSizeInBits()) { + RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp); + } else { + LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp); + VT = RVT; + } + } SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, LHSOp, RHSOp); if (FoldOp.getOpcode() != ISD::UNDEF && @@ -7374,8 +8148,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if ((LLD->hasAnyUseOfValue(1) && (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || - (LLD->hasAnyUseOfValue(1) && - (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS)))) + (RLD->hasAnyUseOfValue(1) && + (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) return false; Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), @@ -7393,7 +8167,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // FIXME: Discards pointer info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), - LLD->getAlignment()); + LLD->isInvariant(), LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), @@ -7509,7 +8283,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, AddToWorkList(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, - false, Alignment); + false, false, Alignment); } } @@ -7517,8 +8291,6 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // Check to see if we can perform the "gzip trick", transforming // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && - N0.getValueType().isInteger() && - N2.getValueType().isInteger() && (N1C->isNullValue() || // (a < 0) ? b : 0 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 EVT XType = N0.getValueType(); @@ -7720,7 +8492,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> SDValue DAGCombiner::BuildSDIV(SDNode *N) { std::vector<SDNode*> Built; - SDValue S = TLI.BuildSDIV(N, DAG, &Built); + SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built); for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); ii != ee; ++ii) @@ -7734,7 +8506,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> SDValue DAGCombiner::BuildUDIV(SDNode *N) { std::vector<SDNode*> Built; - SDValue S = TLI.BuildUDIV(N, DAG, &Built); + SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built); for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); ii != ee; ++ii) @@ -7856,30 +8628,20 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, - int &SrcValueOffset, - unsigned &SrcValueAlign, - const MDNode *&TBAAInfo) const { - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { - Ptr = LD->getBasePtr(); - Size = LD->getMemoryVT().getSizeInBits() >> 3; - SrcValue = LD->getSrcValue(); - SrcValueOffset = LD->getSrcValueOffset(); - SrcValueAlign = LD->getOriginalAlignment(); - TBAAInfo = LD->getTBAAInfo(); - return true; - } - if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { - Ptr = ST->getBasePtr(); - Size = ST->getMemoryVT().getSizeInBits() >> 3; - SrcValue = ST->getSrcValue(); - SrcValueOffset = ST->getSrcValueOffset(); - SrcValueAlign = ST->getOriginalAlignment(); - TBAAInfo = ST->getTBAAInfo(); - return false; - } - llvm_unreachable("FindAliasInfo expected a memory operand"); + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, + int &SrcValueOffset, + unsigned &SrcValueAlign, + const MDNode *&TBAAInfo) const { + LSBaseSDNode *LS = cast<LSBaseSDNode>(N); + + Ptr = LS->getBasePtr(); + Size = LS->getMemoryVT().getSizeInBits() >> 3; + SrcValue = LS->getSrcValue(); + SrcValueOffset = LS->getSrcValueOffset(); + SrcValueAlign = LS->getOriginalAlignment(); + TBAAInfo = LS->getTBAAInfo(); + return isa<LoadSDNode>(LS); } /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index e8f8c73..0c1ac69 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,6 +39,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "isel" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" @@ -58,8 +59,15 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; +STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " + "target-independent selector"); +STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " + "target-specific selector"); +STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); + /// startNewBlock - Set the current block to which generated machine /// instructions will be appended, and clear the local CSE map. /// @@ -96,6 +104,11 @@ bool FastISel::hasTrivialKill(const Value *V) const { !hasTrivialKill(Cast->getOperand(0))) return false; + // GEPs with all zero indices are trivially coalesced by fast-isel. + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) + if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) + return false; + // Only instructions with a single use in the same basic block are considered // to have trivial kills. return I->hasOneUse() && @@ -123,15 +136,8 @@ unsigned FastISel::getRegForValue(const Value *V) { return 0; } - // Look up the value to see if we already have a register for it. We - // cache values defined by Instructions across blocks, and other values - // only locally. This is because Instructions already have the SSA - // def-dominates-use requirement enforced. - DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); - if (I != FuncInfo.ValueMap.end()) - return I->second; - - unsigned Reg = LocalValueMap[V]; + // Look up the value to see if we already have a register for it. + unsigned Reg = lookUpRegForValue(V); if (Reg != 0) return Reg; @@ -186,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { uint32_t IntBitWidth = IntVT.getSizeInBits(); bool isExact; (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); + APFloat::rmTowardZero, &isExact); if (isExact) { APInt IntVal(IntBitWidth, x); @@ -297,6 +303,18 @@ void FastISel::recomputeInsertPt() { ++FuncInfo.InsertPt; } +void FastISel::removeDeadCode(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { + assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!"); + while (I != E) { + MachineInstr *Dead = &*I; + ++I; + Dead->eraseFromParent(); + ++NumFastIselDead; + } + recomputeInsertPt(); +} + FastISel::SavePoint FastISel::enterLocalValueArea() { MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; DebugLoc OldDL = DL; @@ -377,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { ISDOpcode = ISD::SRA; } + // Transform "urem x, pow2" -> "and x, pow2-1". + if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) && + isPowerOf2_64(Imm)) { + --Imm; + ISDOpcode = ISD::AND; + } + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Imm, VT.getSimpleVT()); if (ResultReg == 0) return false; @@ -427,6 +452,11 @@ bool FastISel::SelectGetElementPtr(const User *I) { bool NIsKill = hasTrivialKill(I->getOperand(0)); + // Keep a running tab of the total offset to coalesce multiple N = N + Offset + // into a single N = N + TotalOffset. + uint64_t TotalOffs = 0; + // FIXME: What's a good SWAG number for MaxOffs? + uint64_t MaxOffs = 2048; Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, @@ -436,14 +466,15 @@ bool FastISel::SelectGetElementPtr(const User *I) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset - uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field); - // FIXME: This can be optimized by combining the add with a - // subsequent one. - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; - NIsKill = true; + TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field); + if (TotalOffs >= MaxOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } } Ty = StTy->getElementType(Field); } else { @@ -452,14 +483,26 @@ bool FastISel::SelectGetElementPtr(const User *I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; - uint64_t Offs = + // N = N + Offset + TotalOffs += TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); + if (TotalOffs >= MaxOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + continue; + } + if (TotalOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; - continue; + TotalOffs = 0; } // N = N + Idx * ElementSize; @@ -484,6 +527,12 @@ bool FastISel::SelectGetElementPtr(const User *I) { return false; } } + if (TotalOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, N); @@ -512,21 +561,32 @@ bool FastISel::SelectCall(const User *I) { return true; } + MachineModuleInfo &MMI = FuncInfo.MF->getMMI(); + ComputeUsesVAFloatArgument(*Call, &MMI); + const Function *F = Call->getCalledFunction(); if (!F) return false; // Handle selected intrinsic function calls. switch (F->getIntrinsicID()) { default: break; + // At -O0 we don't care about the lifetime intrinsics. + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); if (!DIVariable(DI->getVariable()).Verify() || - !FuncInfo.MF->getMMI().hasDebugInfo()) + !FuncInfo.MF->getMMI().hasDebugInfo()) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; + } const Value *Address = DI->getAddress(); - if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address)) + if (!Address || isa<UndefValue>(Address)) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; + } unsigned Reg = 0; unsigned Offset = 0; @@ -534,16 +594,36 @@ bool FastISel::SelectCall(const User *I) { // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Reg = TRI.getFrameRegister(*FuncInfo.MF); + Reg = TRI.getFrameRegister(*FuncInfo.MF); } if (!Reg) - Reg = getRegForValue(Address); + Reg = lookUpRegForValue(Address); + + // If we have a VLA that has a "use" in a metadata node that's then used + // here but it has no other uses, then we have a problem. E.g., + // + // int foo (const int *x) { + // char a[*x]; + // return 0; + // } + // + // If we assign 'a' a vreg and fast isel later on has to use the selection + // DAG isel, it will want to copy the value to the vreg. However, there are + // no uses, which goes counter to what selection DAG isel expects. + if (!Reg && !Address->use_empty() && isa<Instruction>(Address) && + (!isa<AllocaInst>(Address) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) + Reg = FuncInfo.InitializeRegForValue(Address); if (Reg) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::DBG_VALUE)) .addReg(Reg, RegState::Debug).addImm(Offset) .addMetadata(DI->getVariable()); + else + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + DEBUG(dbgs() << "Dropping debug info for " << DI); return true; } case Intrinsic::dbg_value: { @@ -581,60 +661,6 @@ bool FastISel::SelectCall(const User *I) { } return true; } - case Intrinsic::eh_exception: { - EVT VT = TLI.getValueType(Call->getType()); - if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand) - break; - - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - unsigned Reg = TLI.getExceptionAddressRegister(); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - UpdateValueMap(Call, ResultReg); - return true; - } - case Intrinsic::eh_selector: { - EVT VT = TLI.getValueType(Call->getType()); - if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand) - break; - if (FuncInfo.MBB->isLandingPad()) - AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB); - else { -#ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(Call); -#endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - - unsigned Reg = TLI.getExceptionSelectorRegister(); - EVT SrcVT = TLI.getPointerTy(); - const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - - bool ResultRegIsKill = hasTrivialKill(Call); - - // Cast the register to the type of the selector. - if (SrcVT.bitsGT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg, ResultRegIsKill); - else if (SrcVT.bitsLT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); - if (ResultReg == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; - - UpdateValueMap(Call, ResultReg); - - return true; - } case Intrinsic::objectsize: { ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1)); unsigned long long Res = CI->isZero() ? -1ULL : 0; @@ -726,8 +752,8 @@ bool FastISel::SelectBitCast(const User *I) { // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { - TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); - TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); + const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); + const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); // Don't attempt a cross-class copy. It will likely fail. if (SrcClass == DstClass) { ResultReg = createResultReg(DstClass); @@ -758,17 +784,33 @@ FastISel::SelectInstruction(const Instruction *I) { DL = I->getDebugLoc(); + MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { + ++NumFastIselSuccessIndependent; DL = DebugLoc(); return true; } + // Remove dead code. However, ignore call instructions since we've flushed + // the local value map and recomputed the insert point. + if (!isa<CallInst>(I)) { + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + } // Next, try calling the target to attempt to handle the instruction. + SavedInsertPt = FuncInfo.InsertPt; if (TargetSelectInstruction(I)) { + ++NumFastIselSuccessTarget; DL = DebugLoc(); return true; } + // Check for dead code and remove as necessary. + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); DL = DebugLoc(); return false; @@ -779,8 +821,11 @@ FastISel::SelectInstruction(const Instruction *I) { /// the CFG. void FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { - if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) { - // The unconditional fall-through case, which needs no instructions. + + if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + // For more accurate line information if this is the only instruction + // in the block then emit it, otherwise we have the unconditional + // fall-through case, which needs no instructions. } else { // The unconditional branch case. TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, @@ -1354,8 +1399,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { - // Promote MVT::i1. - if (VT == MVT::i1) + // Handle integer promotions, though, because they're common and easy. + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); else { FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b052740..8dde919 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "function-lowering-info" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -68,7 +69,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { GetReturnInfo(Fn->getReturnType(), Fn->getAttributes().getRetAttributes(), Outs, TLI); CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF, - Fn->isVarArg(), + Fn->isVarArg(), Outs, Fn->getContext()); // Initialize the mapping of values to registers. This is only set up for @@ -92,14 +93,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { // candidate. I.e., it would trigger the creation of a stack protector. bool MayNeedSP = (AI->isArrayAllocation() || - (TySize > 8 && isa<ArrayType>(Ty) && + (TySize >= 8 && isa<ArrayType>(Ty) && cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, + MayNeedSP); } for (; BB != EB; ++BB) - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { // Mark values used outside their block as exported, by allocating // a virtual register for them. if (isUsedOutsideOfDefiningBlock(I)) @@ -355,7 +358,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { /// argument. This overrides previous frame index entry for this argument, /// if any. void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A, - int FI) { + int FI) { ByValArgFrameIndexMap[A] = FI; } @@ -367,10 +370,34 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { ByValArgFrameIndexMap.find(A); if (I != ByValArgFrameIndexMap.end()) return I->second; - DEBUG(dbgs() << "Argument does not have assigned frame index!"); + DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); return 0; } +/// ComputeUsesVAFloatArgument - Determine if any floating-point values are +/// being passed to this variadic function, and set the MachineModuleInfo's +/// usesVAFloatArgument flag if so. This flag is used to emit an undefined +/// reference to _fltused on Windows, which will link in MSVCRT's +/// floating-point support. +void llvm::ComputeUsesVAFloatArgument(const CallInst &I, + MachineModuleInfo *MMI) +{ + FunctionType *FT = cast<FunctionType>( + I.getCalledValue()->getType()->getContainedType(0)); + if (FT->isVarArg() && !MMI->usesVAFloatArgument()) { + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + Type* T = I.getArgOperand(i)->getType(); + for (po_iterator<Type*> i = po_begin(T), e = po_end(T); + i != e; ++i) { + if (i->isFloatingPointTy()) { + MMI->setUsesVAFloatArgument(true); + return; + } + } + } + } +} + /// AddCatchInfo - Extract the personality and type infos from an eh.selector /// call, and add them to the specified machine basic block. void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, @@ -425,34 +452,6 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, } } -void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad, - MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { - SmallPtrSet<const BasicBlock*, 4> Visited; - - // The 'eh.selector' call may not be in the direct successor of a basic block, - // but could be several successors deeper. If we don't find it, try going one - // level further. <rdar://problem/8824861> - while (Visited.insert(SuccBB)) { - for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end(); - I != E; ++I) - if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { - // Apply the catch info to LPad. - AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]); -#ifndef NDEBUG - if (!FLI.MBBMap[SuccBB]->isLandingPad()) - FLI.CatchInfoFound.insert(EHSel); -#endif - return; - } - - const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator()); - if (Br && Br->isUnconditional()) - SuccBB = Br->getSuccessor(0); - else - break; - } -} - /// AddLandingPadInfo - Extract the exception handling information from the /// landingpad instruction and add them to the specified machine module info. void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 2ff66f8..1467d88 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -294,7 +294,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) DstRC = TII->getRegClass(*II, IIOpNum, TRI); - assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) && + assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) && "Don't have operand info for this instruction!"); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); @@ -351,6 +351,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, MI->addOperand(MachineOperand::CreateFPImm(CFP)); } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask())); } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(), TGA->getTargetFlags())); @@ -574,14 +576,19 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); if ((i & 1) == 0) { - unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); - unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); - const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); - const TargetRegisterClass *SRC = + RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1)); + // Skip physical registers as they don't have a vreg to get and we'll + // insert copies for them in TwoAddressInstructionPass anyway. + if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); + unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); - if (SRC && SRC != RC) { - MRI->setRegClass(NewVReg, SRC); - RC = SRC; + if (SRC && SRC != RC) { + MRI->setRegClass(NewVReg, SRC); + RC = SRC; + } } } AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, @@ -700,33 +707,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Create the new machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); - // The MachineInstr constructor adds implicit-def operands. Scan through - // these to determine which are dead. - if (MI->getNumOperands() != 0 && - Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { - // First, collect all used registers. - SmallVector<unsigned, 8> UsedRegs; - for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) - if (F->getOpcode() == ISD::CopyFromReg) - UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); - else { - // Collect declared implicit uses. - const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); - UsedRegs.append(MCID.getImplicitUses(), - MCID.getImplicitUses() + MCID.getNumImplicitUses()); - // In addition to declared implicit uses, we must also check for - // direct RegisterSDNode operands. - for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) - if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { - unsigned Reg = R->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - UsedRegs.push_back(Reg); - } - } - // Then mark unused registers as dead. - MI->setPhysRegsDeadExcept(UsedRegs, *TRI); - } - // Add result register values for things that are defined by this // instruction. if (NumResults) @@ -751,30 +731,63 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // hook knows where in the block to insert the replacement code. MBB->insert(InsertPos, MI); + // The MachineInstr may also define physregs instead of virtregs. These + // physreg values can reach other instructions in different ways: + // + // 1. When there is a use of a Node value beyond the explicitly defined + // virtual registers, we emit a CopyFromReg for one of the implicitly + // defined physregs. This only happens when HasPhysRegOuts is true. + // + // 2. A CopyFromReg reading a physreg may be glued to this instruction. + // + // 3. A glued instruction may implicitly use a physreg. + // + // 4. A glued instruction may use a RegisterSDNode operand. + // + // Collect all the used physreg defs, and make sure that any unused physreg + // defs are marked as dead. + SmallVector<unsigned, 8> UsedRegs; + // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; - if (Node->hasAnyUseOfValue(i)) - EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); - // If there are no uses, mark the register as dead now, so that - // MachineLICM/Sink can see that it's dead. Don't do this if the - // node has a Glue value, for the benefit of targets still using - // Glue for values in physregs. - else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) - MI->addRegisterDead(Reg, TRI); + if (!Node->hasAnyUseOfValue(i)) + continue; + // This implicitly defined physreg has a use. + UsedRegs.push_back(Reg); + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); } } - // If the instruction has implicit defs and the node doesn't, mark the - // implicit def as dead. If the node has any glue outputs, we don't do this - // because we don't know what implicit defs are being used by glued nodes. - if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) - if (const unsigned *IDList = II.getImplicitDefs()) { - for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs(); - i != e; ++i) - MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI); + // Scan the glue chain for any used physregs. + if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { + for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { + if (F->getOpcode() == ISD::CopyFromReg) { + UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); + continue; + } else if (F->getOpcode() == ISD::CopyToReg) { + // Skip CopyToReg nodes that are internal to the glue chain. + continue; + } + // Collect declared implicit uses. + const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); + UsedRegs.append(MCID.getImplicitUses(), + MCID.getImplicitUses() + MCID.getNumImplicitUses()); + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { + unsigned Reg = R->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + UsedRegs.push_back(Reg); + } } + } + + // Finally mark unused registers as dead. + if (!UsedRegs.empty() || II.getImplicitDefs()) + MI->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. #ifdef NDEBUG @@ -794,10 +807,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, Node->dump(); #endif llvm_unreachable("This target-independent node should have been selected!"); - break; case ISD::EntryToken: llvm_unreachable("EntryToken should have been excluded from the schedule!"); - break; case ISD::MERGE_VALUES: case ISD::TokenFactor: // fall thru break; diff --git a/lib/CodeGen/SelectionDAG/LLVMBuild.txt b/lib/CodeGen/SelectionDAG/LLVMBuild.txt new file mode 100644 index 0000000..81d2e00 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/CodeGen/SelectionDAG/LLVMBuild.txt -----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = SelectionDAG +parent = CodeGen +required_libraries = Analysis CodeGen Core MC Support Target TransformUtils diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 63255ae..a96a997 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -46,37 +46,18 @@ using namespace llvm; /// will attempt merge setcc and brc instructions into brcc's. /// namespace { -class SelectionDAGLegalize { +class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - // Libcall insertion helpers. - - /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been - /// legalized. We use this to ensure that calls are properly serialized - /// against each other, including inserted libcalls. - SDValue LastCALLSEQ_END; - - /// IsLegalizingCall - This member is used *only* for purposes of providing - /// helpful assertions that a libcall isn't created while another call is - /// being legalized (which could lead to non-serialized call sequences). - bool IsLegalizingCall; - - /// LegalizedNodes - For nodes that are of legal width, and that have more - /// than one use, this map indicates what regularized operand to use. This - /// allows us to avoid legalizing the same thing more than once. - DenseMap<SDValue, SDValue> LegalizedNodes; + /// LegalizePosition - The iterator for walking through the node list. + SelectionDAG::allnodes_iterator LegalizePosition; - void AddLegalizedOperand(SDValue From, SDValue To) { - LegalizedNodes.insert(std::make_pair(From, To)); - // If someone requests legalization of the new node, return itself. - if (From != To) - LegalizedNodes.insert(std::make_pair(To, To)); + /// LegalizedNodes - The set of nodes which have already been legalized. + SmallPtrSet<SDNode *, 16> LegalizedNodes; - // Transfer SDDbgValues. - DAG.TransferDbgValues(From, To); - } + // Libcall insertion helpers. public: explicit SelectionDAGLegalize(SelectionDAG &DAG); @@ -84,9 +65,8 @@ public: void LegalizeDAG(); private: - /// LegalizeOp - Return a legal replacement for the given operation, with - /// all legal operands. - SDValue LegalizeOp(SDValue O); + /// LegalizeOp - Legalizes the given operation. + void LegalizeOp(SDNode *Node); SDValue OptimizeFloatStore(StoreSDNode *ST); @@ -105,10 +85,7 @@ private: /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, - SmallVectorImpl<int> &Mask) const; - - bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, - SmallPtrSet<SDNode*, 32> &NodesLeadingTo); + ArrayRef<int> Mask) const; void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl); @@ -150,10 +127,46 @@ private: SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); + SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); - void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); - void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandNode(SDNode *Node); + void PromoteNode(SDNode *Node); + + void ForgetNode(SDNode *N) { + LegalizedNodes.erase(N); + if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) + ++LegalizePosition; + } + +public: + // DAGUpdateListener implementation. + virtual void NodeDeleted(SDNode *N, SDNode *E) { + ForgetNode(N); + } + virtual void NodeUpdated(SDNode *N) {} + + // Node replacement helpers + void ReplacedNode(SDNode *N) { + if (N->use_empty()) { + DAG.RemoveDeadNode(N, this); + } else { + ForgetNode(N); + } + } + void ReplaceNode(SDNode *Old, SDNode *New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old); + } + void ReplaceNode(SDValue Old, SDValue New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old.getNode()); + } + void ReplaceNode(SDNode *Old, const SDValue *New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old); + } }; } @@ -164,7 +177,7 @@ private: SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, - SmallVectorImpl<int> &Mask) const { + ArrayRef<int> Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); unsigned NumDestElts = NVT.getVectorNumElements(); unsigned NumEltsGrowth = NumDestElts / NumMaskElts; @@ -195,145 +208,37 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) } void SelectionDAGLegalize::LegalizeDAG() { - LastCALLSEQ_END = DAG.getEntryNode(); - IsLegalizingCall = false; - - // The legalize process is inherently a bottom-up recursive process (users - // legalize their uses before themselves). Given infinite stack space, we - // could just start legalizing on the root and traverse the whole graph. In - // practice however, this causes us to run out of stack space on large basic - // blocks. To avoid this problem, compute an ordering of the nodes where each - // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) - LegalizeOp(SDValue(I, 0)); - // Finally, it's possible the root changed. Get the new root. - SDValue OldRoot = DAG.getRoot(); - assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); - DAG.setRoot(LegalizedNodes[OldRoot]); - - LegalizedNodes.clear(); - - // Remove dead nodes now. - DAG.RemoveDeadNodes(); -} - - -/// FindCallEndFromCallStart - Given a chained node that is part of a call -/// sequence, find the CALLSEQ_END node that terminates the call sequence. -static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { - // Nested CALLSEQ_START/END constructs aren't yet legal, - // but we can DTRT and handle them correctly here. - if (Node->getOpcode() == ISD::CALLSEQ_START) - depth++; - else if (Node->getOpcode() == ISD::CALLSEQ_END) { - depth--; - if (depth == 0) - return Node; - } - if (Node->use_empty()) - return 0; // No CallSeqEnd - - // The chain is usually at the end. - SDValue TheChain(Node, Node->getNumValues()-1); - if (TheChain.getValueType() != MVT::Other) { - // Sometimes it's at the beginning. - TheChain = SDValue(Node, 0); - if (TheChain.getValueType() != MVT::Other) { - // Otherwise, hunt for it. - for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) - if (Node->getValueType(i) == MVT::Other) { - TheChain = SDValue(Node, i); - break; - } - - // Otherwise, we walked into a node without a chain. - if (TheChain.getValueType() != MVT::Other) - return 0; + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + for (;;) { + bool AnyLegalized = false; + for (LegalizePosition = DAG.allnodes_end(); + LegalizePosition != DAG.allnodes_begin(); ) { + --LegalizePosition; + + SDNode *N = LegalizePosition; + if (LegalizedNodes.insert(N)) { + AnyLegalized = true; + LegalizeOp(N); + } } - } - - for (SDNode::use_iterator UI = Node->use_begin(), - E = Node->use_end(); UI != E; ++UI) { - - // Make sure to only follow users of our token chain. - SDNode *User = *UI; - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) - if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User, depth)) - return Result; - } - return 0; -} - -/// FindCallStartFromCallEnd - Given a chained node that is part of a call -/// sequence, find the CALLSEQ_START node that initiates the call sequence. -static SDNode *FindCallStartFromCallEnd(SDNode *Node) { - int nested = 0; - assert(Node && "Didn't find callseq_start for a call??"); - while (Node->getOpcode() != ISD::CALLSEQ_START || nested) { - Node = Node->getOperand(0).getNode(); - assert(Node->getOperand(0).getValueType() == MVT::Other && - "Node doesn't have a token chain argument!"); - switch (Node->getOpcode()) { - default: + if (!AnyLegalized) break; - case ISD::CALLSEQ_START: - if (!nested) - return Node; - nested--; - break; - case ISD::CALLSEQ_END: - nested++; - break; - } - } - return 0; -} - -/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to -/// see if any uses can reach Dest. If no dest operands can get to dest, -/// legalize them, legalize ourself, and return false, otherwise, return true. -/// -/// Keep track of the nodes we fine that actually do lead to Dest in -/// NodesLeadingTo. This avoids retraversing them exponential number of times. -/// -bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, - SmallPtrSet<SDNode*, 32> &NodesLeadingTo) { - if (N == Dest) return true; // N certainly leads to Dest :) - - // If we've already processed this node and it does lead to Dest, there is no - // need to reprocess it. - if (NodesLeadingTo.count(N)) return true; - - // If the first result of this node has been already legalized, then it cannot - // reach N. - if (LegalizedNodes.count(SDValue(N, 0))) return false; - - // Okay, this node has not already been legalized. Check and legalize all - // operands. If none lead to Dest, then we can legalize this node. - bool OperandsLeadToDest = false; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - OperandsLeadToDest |= // If an operand leads to Dest, so do we. - LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, - NodesLeadingTo); - if (OperandsLeadToDest) { - NodesLeadingTo.insert(N); - return true; } - // Okay, this node looks safe, legalize it and return false. - LegalizeOp(SDValue(N, 0)); - return false; + // Remove dead nodes now. + DAG.RemoveDeadNodes(); } /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or /// a load from the constant pool. -static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, - SelectionDAG &DAG, const TargetLowering &TLI) { +SDValue +SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { bool Extend = false; DebugLoc dl = CFP->getDebugLoc(); @@ -369,20 +274,27 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); - if (Extend) - return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, - DAG.getEntryNode(), - CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, Alignment); - return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, false, - Alignment); + if (Extend) { + SDValue Result = + DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, + DAG.getEntryNode(), + CPIdx, MachinePointerInfo::getConstantPool(), + VT, false, false, Alignment); + return Result; + } + SDValue Result = + DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), false, false, false, + Alignment); + return Result; } /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. -static -SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, - const TargetLowering &TLI) { +static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI, + SelectionDAGLegalize *DAGLegalize) { + assert(ST->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed stores not implemented!"); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); @@ -397,8 +309,10 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // same size, then a (misaligned) int store. // FIXME: Does not handle truncating floating point stores! SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); - return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), Alignment); + Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), Alignment); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); + return; } // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. @@ -427,7 +341,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Load one integer register's worth from the stack slot. SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), @@ -458,8 +372,11 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // The order of the stores doesn't matter - say it with a TokenFactor. - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); + return; } assert(ST->getMemoryVT().isInteger() && !ST->getMemoryVT().isVector() && @@ -488,13 +405,18 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. -static -SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, - const TargetLowering &TLI) { +static void +ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI, + SDValue &ValResult, SDValue &ChainResult) { + assert(LD->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed loads not implemented!"); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); @@ -507,13 +429,15 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // then bitconvert to floating point or vector. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); - SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Result; + ChainResult = Chain; + return; } // Copy the value to a (aligned) stack slot using (unaligned) integer @@ -537,6 +461,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), MinAlign(LD->getAlignment(), Offset)); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, @@ -572,8 +497,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, MachinePointerInfo(), LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. - SDValue Ops[] = { Load, TF }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Load; + ChainResult = TF; + return; } assert(LoadedVT.isInteger() && !LoadedVT.isVector() && "Unaligned load of unsupported type."); @@ -626,8 +552,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - SDValue Ops[] = { Result, TF }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Result; + ChainResult = TF; } /// PerformInsertVectorEltInMemory - Some target cannot handle a variable @@ -672,7 +598,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, false, false, 0); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), false, false, 0); + MachinePointerInfo::getFixedStack(SPFI), false, false, + false, 0); } @@ -763,11 +690,10 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { /// LegalizeOp - Return a legal replacement for the given operation, with /// all legal operands. -SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { - if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. - return Op; +void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return; - SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) @@ -782,13 +708,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); - if (I != LegalizedNodes.end()) return I->second; - SDValue Tmp1, Tmp2, Tmp3, Tmp4; - SDValue Result = Op; bool isCustom = false; // Figure out the correct action; the way to query this varies by opcode @@ -798,10 +718,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: - case ISD::VAARG: case ISD::STACKSAVE: Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; + case ISD::VAARG: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + if (Action != TargetLowering::Promote) + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: @@ -865,7 +790,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::FRAME_TO_ARGS_OFFSET: case ISD::EH_SJLJ_SETJMP: case ISD::EH_SJLJ_LONGJMP: - case ISD::EH_SJLJ_DISPATCHSETUP: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -882,17 +806,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; - case ISD::BUILD_VECTOR: - // A weird case: legalization for BUILD_VECTOR never legalizes the - // operands! - // FIXME: This really sucks... changing it isn't semantically incorrect, - // but it massively pessimizes the code for floating-point BUILD_VECTORs - // because ConstantFP operands get legalized into constant pool loads - // before the BUILD_VECTOR code can see them. It doesn't usually bite, - // though, because BUILD_VECTORS usually get lowered into other nodes - // which get legalized properly. - SimpleFinishLegalizing = false; - break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -903,22 +816,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } if (SimpleFinishLegalizing) { - SmallVector<SDValue, 8> Ops, ResultVals; + SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - Ops.push_back(LegalizeOp(Node->getOperand(i))); + Ops.push_back(Node->getOperand(i)); switch (Node->getOpcode()) { default: break; - case ISD::BR: - case ISD::BRIND: - case ISD::BR_JT: - case ISD::BR_CC: - case ISD::BRCOND: - // Branches tweak the chain to include LastCALLSEQ_END - Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], - LastCALLSEQ_END); - Ops[0] = LegalizeOp(Ops[0]); - LastCALLSEQ_END = DAG.getEntryNode(); - break; case ISD::SHL: case ISD::SRL: case ISD::SRA: @@ -926,57 +828,66 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::ROTR: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[1].getValueType().isVector()) - Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), - Ops[1])); + if (!Ops[1].getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + Ops[1] = Handle.getValue(); + } break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: case ISD::SHL_PARTS: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[2].getValueType().isVector()) - Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), - Ops[2])); + if (!Ops[2].getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + Ops[2] = Handle.getValue(); + } break; } - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), - Ops.size()), 0); + SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + if (NewNode != Node) { + DAG.ReplaceAllUsesWith(Node, NewNode, this); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); + ReplacedNode(Node); + Node = NewNode; + } switch (Action) { case TargetLowering::Legal: - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - ResultVals.push_back(Result.getValue(i)); - break; + return; case TargetLowering::Custom: // FIXME: The handling for custom lowering with multiple results is // a complete mess. - Tmp1 = TLI.LowerOperation(Result, DAG); + Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Tmp1.getNode()) { + SmallVector<SDValue, 8> ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { if (e == 1) ResultVals.push_back(Tmp1); else ResultVals.push_back(Tmp1.getValue(i)); } - break; + if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) { + DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); + ReplacedNode(Node); + } + return; } // FALL THROUGH case TargetLowering::Expand: - ExpandNode(Result.getNode(), ResultVals); - break; + ExpandNode(Node); + return; case TargetLowering::Promote: - PromoteNode(Result.getNode(), ResultVals); - break; - } - if (!ResultVals.empty()) { - for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) { - if (ResultVals[i] != SDValue(Node, i)) - ResultVals[i] = LegalizeOp(ResultVals[i]); - AddLegalizedOperand(SDValue(Node, i), ResultVals[i]); - } - return ResultVals[Op.getResNo()]; + PromoteNode(Node); + return; } } @@ -987,160 +898,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Node->dump( &DAG); dbgs() << "\n"; #endif - assert(0 && "Do not know how to legalize this operator!"); + llvm_unreachable("Do not know how to legalize this operator!"); - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: { - // Scalarize vector SRA/SRL/SHL. - EVT VT = Node->getValueType(0); - assert(VT.isVector() && "Unable to legalize non-vector shift"); - assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); - unsigned NumElem = VT.getVectorNumElements(); - - SmallVector<SDValue, 8> Scalars; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(0), DAG.getIntPtrConstant(Idx)); - SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(1), DAG.getIntPtrConstant(Idx)); - Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, - VT.getScalarType(), Ex, Sh)); - } - Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Scalars[0], Scalars.size()); - break; - } - - case ISD::BUILD_VECTOR: - switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { - default: assert(0 && "This action is not supported yet!"); - case TargetLowering::Custom: - Tmp3 = TLI.LowerOperation(Result, DAG); - if (Tmp3.getNode()) { - Result = Tmp3; - break; - } - // FALLTHROUGH - case TargetLowering::Expand: - Result = ExpandBUILD_VECTOR(Result.getNode()); - break; - } - break; - case ISD::CALLSEQ_START: { - SDNode *CallEnd = FindCallEndFromCallStart(Node); - - // Recursively Legalize all of the inputs of the call end that do not lead - // to this call start. This ensures that any libcalls that need be inserted - // are inserted *before* the CALLSEQ_START. - {SmallPtrSet<SDNode*, 32> NodesLeadingTo; - for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) - LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node, - NodesLeadingTo); - } - - // Now that we have legalized all of the inputs (which may have inserted - // libcalls), create the new CALLSEQ_START node. - Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - - // Merge in the last call to ensure that this call starts after the last - // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { - Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Tmp1, LastCALLSEQ_END); - Tmp1 = LegalizeOp(Tmp1); - } - - // Do not try to legalize the target-specific arguments (#1+). - if (Tmp1 != Node->getOperand(0)) { - SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], - Ops.size()), Result.getResNo()); - } - - // Remember that the CALLSEQ_START is legalized. - AddLegalizedOperand(Op.getValue(0), Result); - if (Node->getNumValues() == 2) // If this has a flag result, remember it. - AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); - - // Now that the callseq_start and all of the non-call nodes above this call - // sequence have been legalized, legalize the call itself. During this - // process, no libcalls can/will be inserted, guaranteeing that no calls - // can overlap. - assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); - // Note that we are selecting this call! - LastCALLSEQ_END = SDValue(CallEnd, 0); - IsLegalizingCall = true; - - // Legalize the call, starting from the CALLSEQ_END. - LegalizeOp(LastCALLSEQ_END); - assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); - return Result; - } + case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: - // If the CALLSEQ_START node hasn't been legalized first, legalize it. This - // will cause this node to be legalized as well as handling libcalls right. - if (LastCALLSEQ_END.getNode() != Node) { - LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); - DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); - assert(I != LegalizedNodes.end() && - "Legalizing the call start should have legalized this node!"); - return I->second; - } - - // Otherwise, the call start has been legalized and everything is going - // according to plan. Just legalize ourselves normally here. - Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - // Do not try to legalize the target-specific arguments (#1+), except for - // an optional flag input. - if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){ - if (Tmp1 != Node->getOperand(0)) { - SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - &Ops[0], Ops.size()), - Result.getResNo()); - } - } else { - Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); - if (Tmp1 != Node->getOperand(0) || - Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { - SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Ops.back() = Tmp2; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - &Ops[0], Ops.size()), - Result.getResNo()); - } - } - assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); - // This finishes up call legalization. - IsLegalizingCall = false; - - // If the CALLSEQ_END node has a flag, remember that we legalized it. - AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); - if (Node->getNumValues() == 2) - AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1)); - return Result.getValue(Op.getResNo()); + break; case ISD::LOAD: { LoadSDNode *LD = cast<LoadSDNode>(Node); - Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. - Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. + Tmp1 = LD->getChain(); // Legalize the chain. + Tmp2 = LD->getBasePtr(); // Legalize the base pointer. ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp3 = Result.getValue(0); - Tmp4 = Result.getValue(1); + Tmp3 = SDValue(Node, 0); + Tmp4 = SDValue(Node, 1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned load and the target doesn't support it, // expand it. @@ -1148,20 +923,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), - DAG, TLI); - Tmp3 = Result.getOperand(0); - Tmp4 = Result.getOperand(1); - Tmp3 = LegalizeOp(Tmp3); - Tmp4 = LegalizeOp(Tmp4); + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, Tmp3, Tmp4); } } break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(Tmp3, DAG); if (Tmp1.getNode()) { - Tmp3 = LegalizeOp(Tmp1); - Tmp4 = LegalizeOp(Tmp1.getValue(1)); + Tmp3 = Tmp1; + Tmp4 = Tmp1.getValue(1); } break; case TargetLowering::Promote: { @@ -1172,17 +943,19 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1)); - Tmp4 = LegalizeOp(Tmp1.getValue(1)); + LD->isInvariant(), LD->getAlignment()); + Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1); + Tmp4 = Tmp1.getValue(1); break; } } - // Since loads produce two values, make sure to remember that we - // legalized both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp3); - AddLegalizedOperand(SDValue(Node, 1), Tmp4); - return Op.getResNo() ? Tmp4 : Tmp3; + if (Tmp4.getNode() != Node) { + assert(Tmp3.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); + ReplacedNode(Node); + } + return; } EVT SrcVT = LD->getMemoryVT(); @@ -1213,9 +986,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); Ch = Result.getValue(1); // The chain. @@ -1230,8 +1004,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); + Tmp1 = Result; + Tmp2 = Ch; } else if (SrcWidth & (SrcWidth - 1)) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); @@ -1274,7 +1048,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } else { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 @@ -1304,29 +1078,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); + Tmp2 = Ch; } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp1 = Result.getValue(0); - Tmp2 = Result.getValue(1); + Tmp1 = SDValue(Node, 0); + Tmp2 = SDValue(Node, 1); if (isCustom) { - Tmp3 = TLI.LowerOperation(Result, DAG); + Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Tmp3.getNode()) { - Tmp1 = LegalizeOp(Tmp3); - Tmp2 = LegalizeOp(Tmp3.getValue(1)); + Tmp1 = Tmp3; + Tmp2 = Tmp3.getValue(1); } } else { // If this is an unaligned load and the target doesn't support it, @@ -1337,12 +1107,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), - DAG, TLI); - Tmp1 = Result.getOperand(0); - Tmp2 = Result.getOperand(1); - Tmp1 = LegalizeOp(Tmp1); - Tmp2 = LegalizeOp(Tmp2); + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, Tmp1, Tmp2); } } } @@ -1352,7 +1118,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->isInvariant(), LD->getAlignment()); unsigned ExtendOp; switch (ExtType) { case ISD::EXTLOAD: @@ -1363,95 +1129,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; default: llvm_unreachable("Unexpected extend load type!"); } - Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp1 = LegalizeOp(Result); // Relegalize new nodes. - Tmp2 = LegalizeOp(Load.getValue(1)); + Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Tmp2 = Load.getValue(1); break; } - // If this is a promoted vector load, and the vector element types are - // legal, then scalarize it. - if (ExtType == ISD::EXTLOAD && SrcVT.isVector() && - TLI.isTypeLegal(Node->getValueType(0).getScalarType())) { - SmallVector<SDValue, 8> LoadVals; - SmallVector<SDValue, 8> LoadChains; - unsigned NumElem = SrcVT.getVectorNumElements(); - unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; - - for (unsigned Idx=0; Idx<NumElem; Idx++) { - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(Stride)); - SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, - Node->getValueType(0).getScalarType(), - Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - - LoadVals.push_back(ScalarLoad.getValue(0)); - LoadChains.push_back(ScalarLoad.getValue(1)); - } - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &LoadChains[0], LoadChains.size()); - SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, - Node->getValueType(0), &LoadVals[0], LoadVals.size()); - - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. - break; - } - - // If this is a promoted vector load, and the vector element types are - // illegal, create the promoted vector from bitcasted segments. - if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) { - EVT MemElemTy = Node->getValueType(0).getScalarType(); - EVT SrcSclrTy = SrcVT.getScalarType(); - unsigned SizeRatio = - (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits()); - - SmallVector<SDValue, 8> LoadVals; - SmallVector<SDValue, 8> LoadChains; - unsigned NumElem = SrcVT.getVectorNumElements(); - unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; - - for (unsigned Idx=0; Idx<NumElem; Idx++) { - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(Stride)); - SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, - SrcVT.getScalarType(), - Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - if (TLI.isBigEndian()) { - // MSB (which is garbage, comes first) - LoadVals.push_back(ScalarLoad.getValue(0)); - for (unsigned i = 0; i<SizeRatio-1; ++i) - LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); - } else { - // LSB (which is data, comes first) - for (unsigned i = 0; i<SizeRatio-1; ++i) - LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); - LoadVals.push_back(ScalarLoad.getValue(0)); - } - LoadChains.push_back(ScalarLoad.getValue(1)); - } - - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &LoadChains[0], LoadChains.size()); - EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(), - SrcVT.getScalarType(), NumElem*SizeRatio); - SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, - TempWideVector, &LoadVals[0], LoadVals.size()); - - // Cast to the correct type - ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes); - - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. - break; - - } + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); // FIXME: This does not work for vectors on most targets. Sign- and // zero-extend operations are currently folded into extending loads, @@ -1461,10 +1145,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. - Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1472,42 +1156,41 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result, DAG.getValueType(SrcVT)); else ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + Tmp1 = ValRes; + Tmp2 = Result.getValue(1); break; } } // Since loads produce two values, make sure to remember that we legalized // both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp1); - AddLegalizedOperand(SDValue(Node, 1), Tmp2); - return Op.getResNo() ? Tmp2 : Tmp1; + if (Tmp2.getNode() != Node) { + assert(Tmp1.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); + ReplacedNode(Node); + } + break; } case ISD::STORE: { StoreSDNode *ST = cast<StoreSDNode>(Node); - Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. - Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. + Tmp1 = ST->getChain(); + Tmp2 = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - Result = SDValue(OptStore, 0); + ReplaceNode(ST, OptStore); break; } { - Tmp3 = LegalizeOp(ST->getValue()); - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp3, Tmp2, - ST->getOffset()), - Result.getResNo()); - + Tmp3 = ST->getValue(); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. @@ -1515,27 +1198,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), - DAG, TLI); + ExpandUnalignedStore(cast<StoreSDNode>(Node), + DAG, TLI, this); } break; case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(Result, DAG); - if (Tmp1.getNode()) Result = Tmp1; + Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Tmp1.getNode()) + ReplaceNode(SDValue(Node, 0), Tmp1); break; - case TargetLowering::Promote: + case TargetLowering::Promote: { assert(VT.isVector() && "Unknown legal promote case!"); Tmp3 = DAG.getNode(ISD::BITCAST, dl, TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + SDValue Result = + DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); break; } + } break; } } else { - Tmp3 = LegalizeOp(ST->getValue()); + Tmp3 = ST->getValue(); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -1547,8 +1234,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1602,17 +1291,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } // The order of the stores doesn't matter. - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + ReplaceNode(SDValue(Node, 0), Result); } else { - if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || - Tmp2 != ST->getBasePtr()) - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp3, Tmp2, - ST->getOffset()), - Result.getResNo()); - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. @@ -1620,120 +1303,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), - DAG, TLI); + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } break; case TargetLowering::Custom: - Result = TLI.LowerOperation(Result, DAG); + ReplaceNode(SDValue(Node, 0), + TLI.LowerOperation(SDValue(Node, 0), DAG)); break; case TargetLowering::Expand: - - EVT WideScalarVT = Tmp3.getValueType().getScalarType(); - EVT NarrowScalarVT = StVT.getScalarType(); - - if (StVT.isVector()) { - unsigned NumElem = StVT.getVectorNumElements(); - // The type of the data we want to save - EVT RegVT = Tmp3.getValueType(); - EVT RegSclVT = RegVT.getScalarType(); - // The type of data as saved in memory. - EVT MemSclVT = StVT.getScalarType(); - - bool RegScalarLegal = TLI.isTypeLegal(RegSclVT); - bool MemScalarLegal = TLI.isTypeLegal(MemSclVT); - - // We need to expand this store. If the register element type - // is legal then we can scalarize the vector and use - // truncating stores. - if (RegScalarLegal) { - // Cast floats into integers - unsigned ScalarSize = MemSclVT.getSizeInBits(); - EVT EltVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize); - - // Round odd types to the next pow of two. - if (!isPowerOf2_32(ScalarSize)) - ScalarSize = NextPowerOf2(ScalarSize); - - // Store Stride in bytes - unsigned Stride = ScalarSize/8; - // Extract each of the elements from the original vector - // and save them into memory individually. - SmallVector<SDValue, 8> Stores; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - RegSclVT, Tmp3, DAG.getIntPtrConstant(Idx)); - - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(Stride)); - - // This scalar TruncStore may be illegal, but we lehalize it - // later. - SDValue Store = DAG.getTruncStore(Tmp1, dl, Ex, Tmp2, - ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment); - - Stores.push_back(Store); - } - - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); - break; - } - - // The scalar register type is illegal. - // For example saving <2 x i64> -> <2 x i32> on a x86. - // In here we bitcast the value into a vector of smaller parts and - // save it using smaller scalars. - if (!RegScalarLegal && MemScalarLegal) { - // Store Stride in bytes - unsigned Stride = MemSclVT.getSizeInBits()/8; - - unsigned SizeRatio = - (RegSclVT.getSizeInBits() / MemSclVT.getSizeInBits()); - - EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), - MemSclVT, - SizeRatio * NumElem); - - // Cast the wide elem vector to wider vec with smaller elem type. - // Example <2 x i64> -> <4 x i32> - Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3); - - SmallVector<SDValue, 8> Stores; - for (unsigned Idx=0; Idx < NumElem * SizeRatio; Idx++) { - // Extract the Ith element. - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); - // Bump pointer. - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(Stride)); - - // Store if, this element is: - // - First element on big endian, or - // - Last element on little endian - if (( TLI.isBigEndian() && (Idx % SizeRatio == 0)) || - ((!TLI.isBigEndian() && (Idx % SizeRatio == SizeRatio-1)))) { - SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, - ST->getPointerInfo().getWithOffset(Idx*Stride), - isVolatile, isNonTemporal, Alignment); - Stores.push_back(Store); - } - } - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); - break; - } - - assert(false && "Unable to legalize the vector trunc store!"); - }// is vector - + assert(!StVT.isVector() && + "Vector Stores are handled in LegalizeVectorOps"); // TRUNCSTORE:i16 i32 -> STORE i16 assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); break; } } @@ -1741,17 +1328,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } } - assert(Result.getValueType() == Op.getValueType() && - "Bad legalization!"); - - // Make sure that the generated code is itself legal. - if (Result != Op) - Result = LegalizeOp(Result); - - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - AddLegalizedOperand(Op, Result); - return Result; } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { @@ -1778,7 +1354,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { if (Op.getValueType().isVector()) return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(), - false, false, 0); + false, false, false, 0); return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), Vec.getValueType().getVectorElementType(), @@ -1826,7 +1402,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { // Finally, load the updated vector. return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, - false, false, 0); + false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1876,7 +1452,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0); + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, + false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { @@ -1905,7 +1482,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { assert(FloatVT.isByteSized() && "Unsupported floating point type!"); // Load out a legal integer with the same sign bit as the float. SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } else { // Little endian SDValue LoadPtr = StackPtr; // The float may be wider than the integer we are going to load. Advance @@ -1916,7 +1493,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { LoadPtr, DAG.getIntPtrConstant(ByteOffset)); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Move the sign bit to the top bit of the loaded integer. unsigned BitShift = LoadTy.getSizeInBits() - (FloatVT.getSizeInBits() - 8 * ByteOffset); @@ -1984,7 +1561,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, EVT OpVT = LHS.getValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); switch (TLI.getCondCodeAction(CCCode, OpVT)) { - default: assert(0 && "Unknown condition code action!"); + default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; @@ -1992,7 +1569,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { - default: assert(0 && "Don't know how to expand this condition!"); + default: llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; @@ -2058,7 +1635,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, // Result is a load from the stack slot. if (SlotSize == DestSize) return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, - false, false, DestAlign); + false, false, false, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, @@ -2081,7 +1658,7 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { false, false, 0); return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(SPFI), - false, false, 0); + false, false, false, 0); } @@ -2127,7 +1704,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // If all elements are constants, create a load from the constant pool. if (isConstant) { - std::vector<Constant*> CV; + SmallVector<Constant*, 16> CV; for (unsigned i = 0, e = NumElems; i != e; ++i) { if (ConstantFPSDNode *V = dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) { @@ -2155,7 +1732,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, Alignment); + false, false, false, Alignment); } if (!MoreThanTwoValues) { @@ -2190,12 +1767,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); - // The input chain to this libcall is the entry node of the function. - // Legalizing the call will automatically add the previous call to the - // dependence. - SDValue InChain = DAG.getEntryNode(); - TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { @@ -2209,26 +1780,31 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // TLI.isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position. - bool isTailCall = isInTailCallPosition(DAG, Node, TLI); + SDValue TCChain = InChain; + bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI); + if (isTailCall) + InChain = TCChain; + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), isTailCall, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); if (!CallInfo.second.getNode()) // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); return CallInfo.first; } @@ -2254,15 +1830,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), false, - /*isReturnValueUsed=*/true, + false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); - return CallInfo.first; } @@ -2272,7 +1843,6 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -2289,18 +1859,13 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); return CallInfo; } @@ -2311,7 +1876,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; case MVT::f80: LC = Call_F80; break; @@ -2328,7 +1893,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; @@ -2343,7 +1908,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -2388,7 +1953,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -2426,21 +1991,16 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. DebugLoc dl = Node->getDebugLoc(); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); - - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr, - MachinePointerInfo(), false, false, 0); + SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, + MachinePointerInfo(), false, false, false, 0); Results.push_back(CallInfo.first); Results.push_back(Rem); } @@ -2489,7 +2049,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, false, false, 0); // load the constructed double SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : @@ -2611,7 +2171,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // offset depending on the data type. uint64_t FF; switch (Op0.getValueType().getSimpleVT().SimpleTy) { - default: assert(0 && "Unsupported integer type!"); + default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) @@ -2629,13 +2189,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, Alignment); + false, false, false, Alignment); else { - FudgeInReg = - LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, - DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, Alignment)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, Alignment); + HandleSDNode Handle(Load); + LegalizeOp(Load.getNode()); + FudgeInReg = Handle.getValue(); } return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); @@ -2731,7 +2293,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { EVT SHVT = TLI.getShiftAmountTy(VT); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; switch (VT.getSimpleVT().SimpleTy) { - default: assert(0 && "Unhandled Expand type in BSWAP!"); + default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); @@ -2788,7 +2350,7 @@ static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl) { switch (Opc) { - default: assert(0 && "Cannot expand this yet!"); + default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { EVT VT = Op.getValueType(); EVT ShVT = TLI.getShiftAmountTy(VT); @@ -2831,6 +2393,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, return Op; } + case ISD::CTLZ_ZERO_UNDEF: + // This trivially expands to CTLZ. + return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); case ISD::CTLZ: { // for now, we do this: // x = x | (x >> 1); @@ -2852,6 +2417,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, Op = DAG.getNOT(dl, Op, VT); return DAG.getNode(ISD::CTPOP, dl, VT, Op); } + case ISD::CTTZ_ZERO_UNDEF: + // This trivially expands to CTTZ. + return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op); case ISD::CTTZ: { // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: @@ -2881,7 +2449,6 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { switch (Opc) { default: llvm_unreachable("Unhandled atomic intrinsic Expand!"); - break; case ISD::ATOMIC_SWAP: switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type for atomic!"); @@ -2959,14 +2526,16 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { return ExpandChainLibCall(LC, Node, false); } -void SelectionDAGLegalize::ExpandNode(SDNode *Node, - SmallVectorImpl<SDValue> &Results) { +void SelectionDAGLegalize::ExpandNode(SDNode *Node) { + SmallVector<SDValue, 8> Results; DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1, Tmp2, Tmp3, Tmp4; switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); Results.push_back(Tmp1); break; @@ -2986,7 +2555,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::PREFETCH: case ISD::VAEND: case ISD::EH_SJLJ_LONGJMP: - case ISD::EH_SJLJ_DISPATCHSETUP: // If the target didn't expand these, there's nothing to do, so just // preserve the chain and be done. Results.push_back(Node->getOperand(0)); @@ -3006,7 +2574,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()), Args, DAG, dl); @@ -3083,7 +2651,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("abort", TLI.getPointerTy()), Args, DAG, dl); Results.push_back(CallResult.second); @@ -3166,7 +2734,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned Align = Node->getConstantOperandVal(3); SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, - MachinePointerInfo(V), false, false, 0); + MachinePointerInfo(V), + false, false, false, 0); SDValue VAList = VAListLoad; if (Align > TLI.getMinStackArgumentAlignment()) { @@ -3191,7 +2760,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, MachinePointerInfo(V), false, false, 0); // Load the actual argument out of the pointer VAList Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), - false, false, 0)); + false, false, false, 0)); Results.push_back(Results[0].getValue(1)); break; } @@ -3202,7 +2771,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), Node->getOperand(2), MachinePointerInfo(VS), - false, false, 0); + false, false, false, 0); Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), MachinePointerInfo(VD), false, false, 0); Results.push_back(Tmp1); @@ -3236,15 +2805,57 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Node->getOperand(2), dl)); break; case ISD::VECTOR_SHUFFLE: { - SmallVector<int, 8> Mask; - cast<ShuffleVectorSDNode>(Node)->getMask(Mask); + SmallVector<int, 32> NewMask; + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask(); EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); - if (!TLI.isTypeLegal(EltVT)) - EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + if (!TLI.isTypeLegal(EltVT)) { + + EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); + + // BUILD_VECTOR operands are allowed to be wider than the element type. + // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it + if (NewEltVT.bitsLT(EltVT)) { + + // Convert shuffle node. + // If original node was v4i64 and the new EltVT is i32, + // cast operands to v8i32 and re-build the mask. + + // Calculate new VT, the size of the new VT should be equal to original. + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, + VT.getSizeInBits()/NewEltVT.getSizeInBits()); + assert(NewVT.bitsEq(VT)); + + // cast operands to new VT + Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0); + Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); + + // Convert the shuffle mask + unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements(); + + // EltVT gets smaller + assert(factor > 0); + + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { + if (Mask[i] < 0) { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]); + } + else { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]*factor+fi); + } + } + Mask = NewMask; + VT = NewVT; + } + EltVT = NewEltVT; + } unsigned NumElems = VT.getVectorNumElements(); - SmallVector<SDValue, 8> Ops; + SmallVector<SDValue, 16> Ops; for (unsigned i = 0; i != NumElems; ++i) { if (Mask[i] < 0) { Ops.push_back(DAG.getUNDEF(EltVT)); @@ -3253,14 +2864,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned Idx = Mask[i]; if (Idx < NumElems) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Node->getOperand(0), + Op0, DAG.getIntPtrConstant(Idx))); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Node->getOperand(1), + Op1, DAG.getIntPtrConstant(Idx - NumElems))); } + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); Results.push_back(Tmp1); break; } @@ -3408,10 +3022,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. - if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) - Results.push_back(SDValue(Node, 0)); - else - Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI)); + if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) + Results.push_back(ExpandConstantFP(CFP, true)); break; } case ISD::EHSELECTION: { @@ -3423,13 +3035,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::EXCEPTIONADDR: { - unsigned Reg = TLI.getExceptionAddressRegister(); + unsigned Reg = TLI.getExceptionPointerRegister(); assert(Reg && "Can't expand to unknown register!"); Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, Node->getValueType(0))); Results.push_back(Results[0].getValue(1)); break; } + case ISD::FSUB: { + EVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && + "Don't know how to expand this FP subtraction!"); + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Results.push_back(Tmp1); + break; + } case ISD::SUB: { EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && @@ -3657,6 +3279,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, DAG.getIntPtrConstant(0)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(1)); + // Ret is a node with an illegal type. Because such things are not + // generally permitted during this phase of legalization, delete the + // node. The above EXTRACT_ELEMENT nodes should have been folded. + DAG.DeleteNode(Ret.getNode()); } if (isSigned) { @@ -3797,7 +3423,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); - LastCALLSEQ_END = DAG.getEntryNode(); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); @@ -3807,6 +3432,35 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Tmp1); break; } + case ISD::BUILD_VECTOR: + Results.push_back(ExpandBUILD_VECTOR(Node)); + break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + // Scalarize vector SRA/SRL/SHL. + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Unable to legalize non-vector shift"); + assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); + unsigned NumElem = VT.getVectorNumElements(); + + SmallVector<SDValue, 8> Scalars; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, + VT.getScalarType(), Ex, Sh)); + } + SDValue Result = + DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Scalars[0], Scalars.size()); + ReplaceNode(SDValue(Node, 0), Result); + break; + } case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -3817,13 +3471,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - Results.push_back(SDValue(Node, i)); break; } + + // Replace the original node with the legalized result. + if (!Results.empty()) + ReplaceNode(Node, Results.data()); } -void SelectionDAGLegalize::PromoteNode(SDNode *Node, - SmallVectorImpl<SDValue> &Results) { + +void SelectionDAGLegalize::PromoteNode(SDNode *Node) { + SmallVector<SDValue, 8> Results; EVT OVT = Node->getValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || @@ -3835,20 +3492,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: // Zero extend the argument. Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); - // Perform the larger operation. + // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is + // already the correct result. Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); if (Node->getOpcode() == ISD::CTTZ) { - //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT) + // FIXME: This should set a bit in the zero extended value instead. Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), ISD::SETEQ); Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); - } else if (Node->getOpcode() == ISD::CTLZ) { + } else if (Node->getOpcode() == ISD::CTLZ || + Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - @@ -3877,6 +3538,33 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, Node->getOpcode() == ISD::SINT_TO_FP, dl); Results.push_back(Tmp1); break; + case ISD::VAARG: { + SDValue Chain = Node->getOperand(0); // Get the chain. + SDValue Ptr = Node->getOperand(1); // Get the pointer. + + unsigned TruncOp; + if (OVT.isVector()) { + TruncOp = ISD::BITCAST; + } else { + assert(OVT.isInteger() + && "VAARG promotion is supported only for vectors or integer types"); + TruncOp = ISD::TRUNCATE; + } + + // Perform the larger operation, then convert back + Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2), + Node->getConstantOperandVal(3)); + Chain = Tmp1.getValue(1); + + Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1); + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + ReplacedNode(Node); + break; + } case ISD::AND: case ISD::OR: case ISD::XOR: { @@ -3924,8 +3612,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, break; } case ISD::VECTOR_SHUFFLE: { - SmallVector<int, 8> Mask; - cast<ShuffleVectorSDNode>(Node)->getMask(Mask); + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask(); // Cast the two input vectors. Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0)); @@ -3950,7 +3637,31 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, Tmp1, Tmp2, Node->getOperand(2))); break; } + case ISD::FDIV: + case ISD::FREM: + case ISD::FPOW: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(0))); + break; } + case ISD::FLOG2: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FEXP: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp2, DAG.getIntPtrConstant(0))); + break; + } + } + + // Replace the original node with the legalized result. + if (!Results.empty()) + ReplaceNode(Node, Results.data()); } // SelectionDAG::Legalize - This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 7c1cc69..e393896 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -479,8 +479,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, - L->isVolatile(), L->isNonTemporal(), L->getAlignment()); + L->getPointerInfo(), NVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -492,7 +492,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), - L->isNonTemporal(), L->getAlignment()); + L->isNonTemporal(), false, L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -672,7 +672,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, case ISD::SETUEQ: LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; break; - default: assert(false && "Do not know how to soften this setcc!"); + default: llvm_unreachable("Do not know how to soften this setcc!"); } } @@ -1212,7 +1212,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, switch (SrcVT.getSimpleVT().SimpleTy) { default: - assert(false && "Unsupported UINT_TO_FP!"); + llvm_unreachable("Unsupported UINT_TO_FP!"); case MVT::i32: Parts = TwoE32; break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a5c4c2d..95ddb1e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -20,7 +20,6 @@ #include "LegalizeTypes.h" #include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -57,8 +56,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::Constant: Res = PromoteIntRes_Constant(N); break; case ISD::CONVERT_RNDSAT: Res = PromoteIntRes_CONVERT_RNDSAT(N); break; + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; @@ -211,13 +212,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { DebugLoc dl = N->getDebugLoc(); switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); - break; case TargetLowering::TypeLegal: break; case TargetLowering::TypePromoteInteger: - if (NOutVT.bitsEq(NInVT)) + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector()) // The input promotes to the same size. Convert the promoted value. return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); break; @@ -251,9 +249,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); } case TargetLowering::TypeWidenVector: - if (OutVT.bitsEq(NInVT)) - // The input is widened to the same size. Convert to the widened value. - return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp)); + // The input is widened to the same size. Convert to the widened value. + // Make sure that the outgoing value is not a vector, because this would + // make us bitcast between two vectors which are legalized in different ways. + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) + return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp)); } return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, @@ -312,7 +312,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { DebugLoc dl = N->getDebugLoc(); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); - Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op); + Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); // Subtract off the extra leading bits in the bigger type. return DAG.getNode(ISD::SUB, dl, NVT, Op, DAG.getConstant(NVT.getSizeInBits() - @@ -330,13 +330,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); - // The count is the same in the promoted type except if the original - // value was zero. This can be handled by setting the bit just off - // the top of the original type. - APInt TopBit(NVT.getSizeInBits(), 0); - TopBit.setBit(OVT.getSizeInBits()); - Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); - return DAG.getNode(ISD::CTTZ, dl, NVT, Op); + if (N->getOpcode() == ISD::CTTZ) { + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + APInt TopBit(NVT.getSizeInBits(), 0); + TopBit.setBit(OVT.getSizeInBits()); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); + } + return DAG.getNode(N->getOpcode(), dl, NVT, Op); } SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -486,7 +488,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { - SDValue Mask = GetPromotedInteger(N->getOperand(0)); + SDValue Mask = N->getOperand(0); + EVT OpTy = N->getOperand(1).getValueType(); + + // Promote all the way up to the canonical SetCC type. + Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy)); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); return DAG.getNode(ISD::VSELECT, N->getDebugLoc(), @@ -1098,8 +1104,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; @@ -1171,7 +1179,6 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { switch (Opc) { default: llvm_unreachable("Unhandled atomic intrinsic Expand!"); - break; case ISD::ATOMIC_SWAP: switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type for atomic!"); @@ -1355,7 +1362,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne); // If we don't know anything about the high bits, exit. if (((KnownZero|KnownOne) & HighBitMask) == 0) @@ -1390,15 +1397,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { } } -#if 0 - // FIXME: This code is broken for shifts with a zero amount! // If we know that all of the high bits of the shift amount are zero, then we // can do this as a couple of simple shifts. if ((KnownZero & HighBitMask) == HighBitMask) { - // Compute 32-amt. - SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy, - DAG.getConstant(NVTBits, ShTy), - Amt); + // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined + // shift if x is zero. We can use XOR here because x is known to be smaller + // than 32. + SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt, + DAG.getConstant(NVTBits-1, ShTy)); + unsigned Op1, Op2; switch (N->getOpcode()) { default: llvm_unreachable("Unknown shift"); @@ -1407,13 +1414,23 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; } - Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt); - Hi = DAG.getNode(ISD::OR, NVT, - DAG.getNode(Op1, NVT, InH, Amt), - DAG.getNode(Op2, NVT, InL, Amt2)); + // When shifting right the arithmetic for Lo and Hi is swapped. + if (N->getOpcode() != ISD::SHL) + std::swap(InL, InH); + + // Use a little trick to get the bits that move from Lo to Hi. First + // shift by one bit. + SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy)); + // Then compute the remaining shift with amount-1. + SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2); + + Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt); + Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2); + + if (N->getOpcode() != ISD::SHL) + std::swap(Hi, Lo); return true; } -#endif return false; } @@ -1493,8 +1510,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); return true; } - - return false; } void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, @@ -1702,8 +1717,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, DAG.getConstant(0, NVT), ISD::SETNE); - SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo); - SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi); + SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, DAG.getNode(ISD::ADD, dl, NVT, LoLZ, @@ -1732,8 +1747,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, DAG.getConstant(0, NVT), ISD::SETNE); - SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo); - SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi); + SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, DAG.getNode(ISD::ADD, dl, NVT, HiLZ, @@ -1778,6 +1793,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); + bool isInvariant = N->isInvariant(); DebugLoc dl = N->getDebugLoc(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1808,7 +1824,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2305,12 +2321,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(Chain, RetTy, true, false, false, false, - 0, TLI.getLibcallCallingConv(LC), false, - true, Func, Args, DAG, dl); + 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Func, Args, DAG, dl); SplitInteger(CallInfo.first, Lo, Hi); SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, DAG.getConstant(0, PtrVT), ISD::SETNE); @@ -2781,7 +2799,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { else if (SrcVT == MVT::i128) FF = APInt(32, F32TwoE128); else - assert(false && "Unsupported UINT_TO_FP!"); + llvm_unreachable("Unsupported UINT_TO_FP!"); // Check whether the sign bit is set. SDValue Lo, Hi; @@ -2926,38 +2944,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue Op0 = N->getOperand(1); - SDValue Op1 = N->getOperand(1); - assert(Op0.getValueType() == Op1.getValueType() && - "Invalid input vector types"); - EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + EVT InElemTy = OutVT.getVectorElementType(); EVT OutElemTy = NOutVT.getVectorElementType(); - unsigned NumElem0 = Op0.getValueType().getVectorNumElements(); - unsigned NumElem1 = Op1.getValueType().getVectorNumElements(); + unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); unsigned NumOutElem = NOutVT.getVectorNumElements(); - assert(NumElem0 + NumElem1 == NumOutElem && - "Invalid number of incoming elements"); + unsigned NumOperands = N->getNumOperands(); + assert(NumElem * NumOperands == NumOutElem && + "Unexpected number of elements"); // Take the elements from the first vector. SmallVector<SDValue, 8> Ops(NumOutElem); - for (unsigned i = 0; i < NumElem0; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - Op0.getValueType().getScalarType(), Op0, - DAG.getIntPtrConstant(i)); - Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); - } - - // Take the elements from the second vector - for (unsigned i = 0; i < NumElem1; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - Op1.getValueType().getScalarType(), Op1, - DAG.getIntPtrConstant(i)); - Ops[i + NumElem0] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + for (unsigned i = 0; i < NumOperands; ++i) { + SDValue Op = N->getOperand(i); + for (unsigned j = 0; j < NumElem; ++j) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InElemTy, Op, DAG.getIntPtrConstant(j)); + Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + } } return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a4bb577..439aa4d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -222,8 +222,6 @@ bool DAGTypeLegalizer::run() { for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { EVT ResultVT = N->getValueType(i); switch (getTypeAction(ResultVT)) { - default: - assert(false && "Unknown action!"); case TargetLowering::TypeLegal: break; // The following calls must take care of *all* of the node's results, @@ -275,8 +273,6 @@ ScanOperands: EVT OpVT = N->getOperand(i).getValueType(); switch (getTypeAction(OpVT)) { - default: - assert(false && "Unknown action!"); case TargetLowering::TypeLegal: continue; // The following calls must either replace all of the node's results @@ -752,7 +748,11 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { - assert(Result.getValueType() == Op.getValueType().getVectorElementType() && + // Note that in some cases vector operation operands may be greater than + // the vector element type. For example BUILD_VECTOR of type <1 x i1> with + // a constant i8 operand. + assert(Result.getValueType().getSizeInBits() >= + Op.getValueType().getVectorElementType().getSizeInBits() && "Invalid type for scalarized vector"); AnalyzeNewValue(Result); @@ -889,7 +889,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, MachinePointerInfo(), false, false, 0); // Result is a load from the stack slot. return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } /// CustomLowerNode - Replace the node's results with custom code provided @@ -1056,8 +1056,9 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), false, - /*isReturnValueUsed=*/true, + false, 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); return CallInfo.first; } @@ -1084,12 +1085,11 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); return CallInfo; diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index abacdac..e866445 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -521,6 +521,7 @@ private: SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); SDValue ScalarizeVecRes_SETCC(SDNode *N); @@ -633,6 +634,7 @@ private: SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_Convert(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 8e7e498..a8ff7c6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -21,7 +21,6 @@ #include "LegalizeTypes.h" #include "llvm/Target/TargetData.h" -#include "llvm/CodeGen/PseudoSourceValue.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -46,8 +45,6 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Handle some special cases efficiently. switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: break; @@ -130,7 +127,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, 0); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, + false, false, false, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -140,7 +138,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo.getWithOffset(IncrementSize), false, - false, MinAlign(Alignment, IncrementSize)); + false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. if (TLI.isBigEndian()) @@ -212,11 +210,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -224,7 +223,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - isVolatile, isNonTemporal, + isVolatile, isNonTemporal, isInvariant, MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index f815b00..3ae8345 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -64,6 +64,8 @@ class VectorLegalizer { // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); + SDValue ExpandLoad(SDValue Op); + SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); // Implements vector promotion; this is essentially just bitcasting the // operands to a different type and bitcasting the result back to the @@ -124,6 +126,33 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); + if (Op.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { + if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT())) + return TranslateLegalizeResults(Op, Result); + Changed = true; + return LegalizeOp(ExpandLoad(Op)); + } + } else if (Op.getOpcode() == ISD::STORE) { + StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + EVT StVT = ST->getMemoryVT(); + EVT ValVT = ST->getValue().getValueType(); + if (StVT.isVector() && ST->isTruncatingStore()) + switch (TLI.getTruncStoreAction(ValVT, StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + return TranslateLegalizeResults(Op, Result); + case TargetLowering::Custom: + Changed = true; + return LegalizeOp(TLI.LowerOperation(Result, DAG)); + case TargetLowering::Expand: + Changed = true; + return LegalizeOp(ExpandStore(Op)); + } + } + bool HasVectorValue = false; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); J != E; @@ -156,8 +185,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SRL: case ISD::ROTL: case ISD::ROTR: - case ISD::CTTZ: case ISD::CTLZ: + case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTPOP: case ISD::SELECT: case ISD::VSELECT: @@ -262,6 +293,97 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { return DAG.getNode(ISD::BITCAST, dl, VT, Op); } + +SDValue VectorLegalizer::ExpandLoad(SDValue Op) { + DebugLoc dl = Op.getDebugLoc(); + LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + EVT SrcVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, + Op.getNode()->getValueType(0).getScalarType(), + Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(Stride)); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, + Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size()); + + AddLegalizedOperand(Op.getValue(0), Value); + AddLegalizedOperand(Op.getValue(1), NewChain); + + return (Op.getResNo() ? NewChain : Value); +} + +SDValue VectorLegalizer::ExpandStore(SDValue Op) { + DebugLoc dl = Op.getDebugLoc(); + StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + SDValue Chain = ST->getChain(); + SDValue BasePTR = ST->getBasePtr(); + SDValue Value = ST->getValue(); + EVT StVT = ST->getMemoryVT(); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + + unsigned NumElem = StVT.getVectorNumElements(); + // The type of the data we want to save + EVT RegVT = Value.getValueType(); + EVT RegSclVT = RegVT.getScalarType(); + // The type of data as saved in memory. + EVT MemSclVT = StVT.getScalarType(); + + // Cast floats into integers + unsigned ScalarSize = MemSclVT.getSizeInBits(); + + // Round odd types to the next pow of two. + if (!isPowerOf2_32(ScalarSize)) + ScalarSize = NextPowerOf2(ScalarSize); + + // Store Stride in bytes + unsigned Stride = ScalarSize/8; + // Extract each of the elements from the original vector + // and save them into memory individually. + SmallVector<SDValue, 8> Stores; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + RegSclVT, Value, DAG.getIntPtrConstant(Idx)); + + // This scalar TruncStore may be illegal, but we legalize it later. + SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, + ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, + isVolatile, isNonTemporal, Alignment); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(Stride)); + + Stores.push_back(Store); + } + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + AddLegalizedOperand(Op, TF); + return TF; +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -274,10 +396,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // If we can't even use the basic vector operations of // AND,OR,XOR, we will have to scalarize the op. - if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) || - !TLI.isOperationLegalOrCustom(ISD::XOR, VT) || - !TLI.isOperationLegalOrCustom(ISD::OR, VT)) - return DAG.UnrollVectorOp(Op.getNode()); + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits() && "Invalid mask size"); @@ -301,9 +425,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { DebugLoc DL = Op.getDebugLoc(); // Make sure that the SINT_TO_FP and SRL instructions are available. - if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) || - !TLI.isOperationLegalOrCustom(ISD::SRL, VT)) - return DAG.UnrollVectorOp(Op.getNode()); + if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); EVT SVT = VT.getScalarType(); assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 107a42b..5f23f01 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -21,7 +21,6 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -59,6 +58,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; @@ -194,7 +194,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment()); + N->isInvariant(), N->getOriginalAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -227,6 +227,37 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { return InOp; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { + SDValue Cond = GetScalarizedVector(N->getOperand(0)); + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true); + if (ScalarBool != VecBool) { + EVT CondVT = Cond.getValueType(); + switch (ScalarBool) { + case TargetLowering::UndefinedBooleanContent: + break; + case TargetLowering::ZeroOrOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); + // Vector read from all ones, scalar expects a single 1 so mask. + Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT, + Cond, DAG.getConstant(1, CondVT)); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrOneBooleanContent); + // Vector reads from a one, scalar from all ones so sign extend. + Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT, + Cond, DAG.getValueType(MVT::i1)); + break; + } + } + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(ISD::SELECT, N->getDebugLoc(), @@ -405,6 +436,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; switch (N->getOpcode()) { default: @@ -442,8 +477,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: - case ISD::CTPOP: case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -677,7 +714,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Lo part from the stack slot. Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; @@ -686,7 +723,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, MinAlign(Alignment, IncrementSize)); + false, false, false, MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -713,20 +750,21 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, unsigned Alignment = LD->getOriginalAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - Alignment); + isInvariant, Alignment); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVolatile, isNonTemporal, Alignment); + HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment); // Build a factor node to remember that this load is independent of the // other one. @@ -773,46 +811,18 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - // Split the input. + // If the input also splits, handle it directly for a compile time speedup. + // Otherwise split it by hand. EVT InVT = N->getOperand(0).getValueType(); - switch (getTypeAction(InVT)) { - default: llvm_unreachable("Unexpected type action!"); - case TargetLowering::TypeLegal: { + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + GetSplitVector(N->getOperand(0), Lo, Hi); + } else { EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), DAG.getIntPtrConstant(0)); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - case TargetLowering::TypePromoteInteger: { - SDValue InOp = GetPromotedInteger(N->getOperand(0)); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), - InOp.getValueType().getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - case TargetLowering::TypeSplitVector: - GetSplitVector(N->getOperand(0), Lo, Hi); - break; - case TargetLowering::TypeWidenVector: { - // If the result needs to be split and the input needs to be widened, - // the two types must have different lengths. Use the widened result - // and extract from it to do the split. - SDValue InOp = GetWidenedVector(N->getOperand(0)); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } } if (N->getOpcode() == ISD::FP_ROUND) { @@ -1239,6 +1249,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::VSELECT: case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; @@ -1590,12 +1601,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { DebugLoc dl = N->getDebugLoc(); switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); - break; case TargetLowering::TypeLegal: break; case TargetLowering::TypePromoteInteger: + // If the incoming type is a vector that is being promoted, then + // we know that the elements are arranged differently and that we + // must perform the conversion using a stack slot. + if (InVT.isVector()) + break; + // If the InOp is promoted to the same size, convert it. Otherwise, // fall out of the switch and widen the promoted input. InOp = GetPromotedInteger(InOp); @@ -1928,7 +1942,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, Cond1, InOp1, InOp2); } @@ -2032,6 +2046,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; case ISD::FP_EXTEND: case ISD::FP_TO_SINT: @@ -2165,6 +2180,32 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { MVT::Other,&StChain[0],StChain.size()); } +SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { + SDValue InOp0 = GetWidenedVector(N->getOperand(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + + // WARNING: In this code we widen the compare instruction with garbage. + // This garbage may contain denormal floats which may be slow. Is this a real + // concern ? Should we zero the unused lanes if this is a float compare ? + + // Get a new SETCC node to compare the newly widened operands. + // Only some of the compared elements are legal. + EVT SVT = TLI.getSetCCResultType(InOp0.getValueType()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SVT, InOp0, InOp1, N->getOperand(2)); + + // Extract the needed results from the result vector. + EVT ResVT = EVT::getVectorVT(*DAG.getContext(), + SVT.getVectorElementType(), + N->getValueType(0).getVectorNumElements()); + SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + ResVT, WideSETCC, DAG.getIntPtrConstant(0)); + + return PromoteTargetBoolean(CC, N->getValueType(0)); +} + + //===----------------------------------------------------------------------===// // Vector Widening Utilities //===----------------------------------------------------------------------===// @@ -2276,6 +2317,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2285,7 +2327,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - isVolatile, isNonTemporal, Align); + isVolatile, isNonTemporal, isInvariant, Align); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2323,18 +2365,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Increment)); + SDValue L; if (LdWidth < NewVTWidth) { // Our current type we are using is too large, find a better size NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); NewVTWidth = NewVT.getSizeInBits(); - } - - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, MinAlign(Align, Increment)); - LdChain.push_back(LdOp.getValue(1)); - LdOps.push_back(LdOp); + isNonTemporal, isInvariant, + MinAlign(Align, Increment)); + LdChain.push_back(L.getValue(1)); + if (L->getValueType(0).isVector()) { + SmallVector<SDValue, 16> Loads; + Loads.push_back(L); + unsigned size = L->getValueSizeInBits(0); + while (size < LdOp->getValueSizeInBits(0)) { + Loads.push_back(DAG.getUNDEF(L->getValueType(0))); + size += L->getValueSizeInBits(0); + } + L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), + &Loads[0], Loads.size()); + } + } else { + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment)); + LdChain.push_back(L.getValue(1)); + } + + LdOps.push_back(L); + LdWidth -= NewVTWidth; } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp new file mode 100644 index 0000000..ff0136e --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -0,0 +1,657 @@ +//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ResourcePriorityQueue class, which is a +// SchedulingPriorityQueue that prioritizes instructions using DFA state to +// reduce the length of the critical path through the basic block +// on VLIW platforms. +// The scheduler is basically a top-down adaptable list scheduler with DFA +// resource tracking added to the cost function. +// DFA is queried as a state machine to model "packets/bundles" during +// schedule. Currently packets/bundles are discarded at the end of +// scheduling, affecting only order of instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scheduler" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLowering.h" + +using namespace llvm; + +static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable use of DFA during scheduling")); + +static cl::opt<signed> RegPressureThreshold( + "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), + cl::desc("Track reg pressure and switch priority to in-depth")); + + +ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : + Picker(this), + InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData()) +{ + TII = IS->getTargetLowering().getTargetMachine().getInstrInfo(); + TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo(); + TLI = &IS->getTargetLowering(); + + const TargetMachine &tm = (*IS->MF).getTarget(); + ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); + // This hard requirment could be relaxed, but for now + // do not let it procede. + assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF); + + ParallelLiveRanges = 0; + HorizontalVerticalBalance = 0; +} + +unsigned +ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { + unsigned NumberDeps = 0; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + + SUnit *PredSU = I->getSUnit(); + const SDNode *ScegN = PredSU->getNode(); + + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: NumberDeps++; break; + case ISD::CopyToReg: break; + case ISD::INLINEASM: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + EVT VT = ScegN->getValueType(i); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, + unsigned RCId) { + unsigned NumberDeps = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + + SUnit *SuccSU = I->getSUnit(); + const SDNode *ScegN = SuccSU->getNode(); + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: break; + case ISD::CopyToReg: NumberDeps++; break; + case ISD::INLINEASM: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +static unsigned numberCtrlDepsInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (I->isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +static unsigned numberCtrlPredInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (I->isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +/// +/// Initialize nodes. +/// +void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + NumNodesSolelyBlocking.resize(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + initNumRegDefsLeft(SU); + SU->NodeQueueId = 0; + } +} + +/// This heuristic is used if DFA scheduling is not desired +/// for some VLIW platform. +bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return LHSNum < RHSNum; +} + + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + return OnlyAvailablePred; +} + +void ResourcePriorityQueue::push(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + Queue.push_back(SU); +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { + if (!SU || !SU->getNode()) + return false; + + // If this is a compound instruction, + // it is likely to be a call. Do not delay it. + if (SU->getNode()->getGluedNode()) + return true; + + // First see if the pipeline could receive this instruction + // in the current cycle. + if (SU->getNode()->isMachineOpcode()) + switch (SU->getNode()->getMachineOpcode()) { + default: + if (!ResourcesModel->canReserveResources(&TII->get( + SU->getNode()->getMachineOpcode()))) + return false; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + + // Now see if there are no other dependencies + // to instructions alredy in the packet. + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), + E = Packet[i]->Succs.end(); I != E; ++I) { + // Since we do not add pseudos to packets, might as well + // ignor order deps. + if (I->isCtrl()) + continue; + + if (I->getSUnit() == SU) + return false; + } + + return true; +} + +/// Keep track of available resources. +void ResourcePriorityQueue::reserveResources(SUnit *SU) { + // If this SU does not fit in the packet + // start a new one. + if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) { + ResourcesModel->clearResources(); + Packet.clear(); + } + + if (SU->getNode() && SU->getNode()->isMachineOpcode()) { + switch (SU->getNode()->getMachineOpcode()) { + default: + ResourcesModel->reserveResources(&TII->get( + SU->getNode()->getMachineOpcode())); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + Packet.push_back(SU); + } + // Forcefully end packet for PseudoOps. + else { + ResourcesModel->clearResources(); + Packet.clear(); + } + + // If packet is now full, reset the state so in the next cycle + // we start fresh. + if (Packet.size() >= InstrItins->IssueWidth) { + ResourcesModel->clearResources(); + Packet.clear(); + } +} + +signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { + signed RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + // Gen estimate. + for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { + EVT VT = SU->getNode()->getValueType(i); + if (TLI->isTypeLegal(VT) + && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance += numberRCValSuccInSU(SU, RCId); + } + // Kill estimate. + for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { + const SDValue &Op = SU->getNode()->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (isa<ConstantSDNode>(Op.getNode())) + continue; + + if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance -= numberRCValPredInSU(SU, RCId); + } + return RegBalance; +} + +/// Estimates change in reg pressure from this SU. +/// It is acheived by trivial tracking of defined +/// and used vregs in dependent instructions. +/// The RawPressure flag makes this function to ignore +/// existing reg file sizes, and report raw def/use +/// balance. +signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { + signed RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + if (RawPressure) { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + else { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + if ((RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) > 0) && + (RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()])) + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + + return RegBalance; +} + +// Constants used to denote relative importance of +// heuristic components for cost computation. +static const unsigned PriorityOne = 200; +static const unsigned PriorityTwo = 100; +static const unsigned PriorityThree = 50; +static const unsigned PriorityFour = 15; +static const unsigned PriorityFive = 5; +static const unsigned ScaleOne = 20; +static const unsigned ScaleTwo = 10; +static const unsigned ScaleThree = 5; +static const unsigned FactorOne = 2; + +/// Returns single number reflecting benefit of scheduling SU +/// in the current cycle. +signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { + // Initial trivial priority. + signed ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (SU->isScheduled) + return ResCount; + + // Forced priority is high. + if (SU->isScheduleHigh) + ResCount += PriorityOne; + + // Adaptable scheduling + // A small, but very parallel + // region, where reg pressure is an issue. + if (HorizontalVerticalBalance > RegPressureThreshold) { + // Critical path first + ResCount += (SU->getHeight() * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + // Consider change to reg pressure from scheduling + // this SU. + ResCount -= (regPressureDelta(SU,true) * ScaleOne); + } + // Default heuristic, greeady and + // critical path driven. + else { + // Critical path first. + ResCount += (SU->getHeight() * ScaleTwo); + // Now see how many instructions is blocked by this SU. + ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + ResCount -= (regPressureDelta(SU) * ScaleTwo); + } + + // These are platform specific things. + // Will need to go into the back end + // and accessed from here via a hook. + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + if (TID.isCall()) + ResCount += (PriorityThree + (ScaleThree*N->getNumValues())); + } + else + switch (N->getOpcode()) { + default: break; + case ISD::TokenFactor: + case ISD::CopyFromReg: + case ISD::CopyToReg: + ResCount += PriorityFive; + break; + + case ISD::INLINEASM: + ResCount += PriorityFour; + break; + } + } + return ResCount; +} + + +/// Main resource tracking point. +void ResourcePriorityQueue::scheduledNode(SUnit *SU) { + // Use NULL entry as an event marker to reset + // the DFA state. + if (!SU) { + ResourcesModel->clearResources(); + Packet.clear(); + return; + } + + const SDNode *ScegN = SU->getNode(); + // Update reg pressure tracking. + // First update current node. + if (ScegN->isMachineOpcode()) { + // Estimate generated regs. + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + EVT VT = ScegN->getValueType(i); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) + RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID()); + } + } + // Estimate killed regs. + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) { + if (RegPressure[RC->getID()] > + (numberRCValPredInSU(SU, RC->getID()))) + RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID()); + else RegPressure[RC->getID()] = 0; + } + } + } + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0)) + continue; + --I->getSUnit()->NumRegDefsLeft; + } + } + + // Reserve resources for this SU. + reserveResources(SU); + + // Adjust number of parallel live ranges. + // Heuristic is simple - node with no data successors reduces + // number of live ranges. All others, increase it. + unsigned NumberNonControlDeps = 0; + + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + adjustPriorityOfUnscheduledPreds(I->getSUnit()); + if (!I->isCtrl()) + NumberNonControlDeps++; + } + + if (!NumberNonControlDeps) { + if (ParallelLiveRanges >= SU->NumPreds) + ParallelLiveRanges -= SU->NumPreds; + else + ParallelLiveRanges = 0; + + } + else + ParallelLiveRanges += SU->NumRegDefsLeft; + + // Track parallel live chains. + HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU)); + HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU)); +} + +void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { + unsigned NodeNumDefs = 0; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + // No register need be allocated for this. + if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + NodeNumDefs = 0; + break; + } + NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs()); + } + else + switch(N->getOpcode()) { + default: break; + case ISD::CopyFromReg: + NodeNumDefs++; + break; + case ISD::INLINEASM: + NodeNumDefs++; + break; + } + + SU->NumRegDefsLeft = NodeNumDefs; +} + +/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isAvailable) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) + return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} + + +/// Main access point - returns next instructions +/// to be placed in scheduling sequence. +SUnit *ResourcePriorityQueue::pop() { + if (empty()) + return 0; + + std::vector<SUnit *>::iterator Best = Queue.begin(); + if (!DisableDFASched) { + signed BestCost = SUSchedulingCost(*Best); + for (std::vector<SUnit *>::iterator I = Queue.begin(), + E = Queue.end(); I != E; ++I) { + if (*I == *Best) + continue; + + if (SUSchedulingCost(*I) > BestCost) { + BestCost = SUSchedulingCost(*I); + Best = I; + } + } + } + // Use default TD scheduling mechanism. + else { + for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + } + + SUnit *V = *Best; + if (Best != prior(Queue.end())) + std::swap(*Best, Queue.back()); + + Queue.pop_back(); + + return V; +} + + +void ResourcePriorityQueue::remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + + Queue.pop_back(); +} + + +#ifdef NDEBUG +void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {} +#else +void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const { + ResourcePriorityQueue q = *this; + while (!q.empty()) { + SUnit *su = q.pop(); + dbgs() << "Height " << su->getHeight() << ": "; + su->dump(DAG); + } +} +#endif diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index b275c63..24da432 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -43,7 +43,7 @@ namespace { SmallVector<SUnit *, 16> Queue; bool empty() const { return Queue.empty(); } - + void push(SUnit *U) { Queue.push_back(U); } @@ -101,8 +101,8 @@ private: bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); void ListScheduleBottomUp(); - /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies. - bool ForceUnitLatencies() const { return true; } + /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool forceUnitLatencies() const { return true; } }; } // end anonymous namespace @@ -112,7 +112,7 @@ void ScheduleDAGFast::Schedule() { DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegCycles.resize(TRI->getNumRegs(), 0); // Build the scheduling graph. @@ -159,7 +159,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { ReleasePred(SU, &*I); if (I->isAssignedRegDep()) { // This is a physical register dependency and it's impossible or - // expensive to copy the register. Make sure nothing that can + // expensive to copy the register. Make sure nothing that can // clobber the register is scheduled between the predecessor and // this node. if (!LiveRegDefs[I->getReg()]) { @@ -245,10 +245,10 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), SDValue(LoadNode, 1)); - SUnit *NewSU = NewSUnit(N); + SUnit *NewSU = newSUnit(N); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { @@ -268,7 +268,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { LoadSU = &SUnits[LoadNode->getNodeId()]; isNewLoad = false; } else { - LoadSU = NewSUnit(LoadNode); + LoadSU = newSUnit(LoadNode); LoadNode->setNodeId(LoadSU->NodeNum); } @@ -329,7 +329,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { D.setSUnit(LoadSU); AddPred(SuccDep, D); } - } + } if (isNewLoad) { AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); } @@ -381,11 +381,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVector<SUnit*, 2> &Copies) { - SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL)); + SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; - SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL)); + SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL)); CopyToSU->CopySrcRC = DestRC; CopyToSU->CopyDstRC = SrcRC; @@ -425,7 +425,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); unsigned NumRes = MCID.getNumDefs(); - for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -447,7 +447,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, Added = true; } } - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { if (RegAdded.insert(*Alias)) { LRegs.push_back(*Alias); @@ -508,7 +508,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) { + for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } @@ -630,7 +630,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { std::reverse(Sequence.begin(), Sequence.end()); #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/true); + VerifyScheduledSequence(/*isBottomUp=*/true); #endif } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index e757def..2cb5d37 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -45,10 +45,6 @@ static RegisterScheduler "Bottom-up register reduction list scheduling", createBURRListDAGScheduler); static RegisterScheduler - tdrListrDAGScheduler("list-tdrr", - "Top-down register reduction list scheduling", - createTDRRListDAGScheduler); -static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", @@ -93,6 +89,9 @@ static cl::opt<bool> DisableSchedCriticalPath( static cl::opt<bool> DisableSchedHeight( "disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp")); +static cl::opt<bool> Disable2AddrHack( + "disable-2addr-hack", cl::Hidden, cl::init(true), + cl::desc("Disable scheduler's two-address hack")); static cl::opt<int> MaxReorderWindow( "max-sched-reorder", cl::Hidden, cl::init(6), @@ -103,17 +102,6 @@ static cl::opt<unsigned> AvgIPC( "sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists.")); -#ifndef NDEBUG -namespace { - // For sched=list-ilp, Count the number of times each factor comes into play. - enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, - FactStatic, FactOther, NumFactors }; -} -static const char *FactorName[NumFactors] = -{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; -static int FactorCount[NumFactors]; -#endif //!NDEBUG - namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -121,10 +109,6 @@ namespace { /// class ScheduleDAGRRList : public ScheduleDAGSDNodes { private: - /// isBottomUp - This is true if the scheduling problem is bottom-up, false if - /// it is top-down. - bool isBottomUp; - /// NeedLatency - True if the scheduler will make use of latency information. /// bool NeedLatency; @@ -162,11 +146,15 @@ private: /// and similar queries. ScheduleDAGTopologicalSort Topo; + // Hack to keep track of the inverse of FindCallSeqStart without more crazy + // DAG crawling. + DenseMap<SUnit*, SUnit*> CallSeqEndForStart; + public: ScheduleDAGRRList(MachineFunction &mf, bool needlatency, SchedulingPriorityQueue *availqueue, CodeGenOpt::Level OptLevel) - : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), + : ScheduleDAGSDNodes(mf), NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), Topo(SUnits) { @@ -221,8 +209,6 @@ private: void ReleasePred(SUnit *SU, const SDep *PredEdge); void ReleasePredecessors(SUnit *SU); - void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); - void ReleaseSuccessors(SUnit *SU); void ReleasePending(); void AdvanceToCycle(unsigned NextCycle); void AdvancePastStalls(SUnit *SU); @@ -242,15 +228,11 @@ private: SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); - void ScheduleNodeTopDown(SUnit*); - void ListScheduleTopDown(); - - /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. /// Updates the topological ordering if required. SUnit *CreateNewSUnit(SDNode *N) { unsigned NumSUnits = SUnits.size(); - SUnit *NewNode = NewSUnit(N); + SUnit *NewNode = newSUnit(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) Topo.InitDAGTopologicalSorting(); @@ -268,9 +250,9 @@ private: return NewNode; } - /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't + /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't /// need actual latency information but the hybrid scheduler does. - bool ForceUnitLatencies() const { + bool forceUnitLatencies() const { return !NeedLatency; } }; @@ -278,7 +260,7 @@ private: /// GetCostForDef - Looks up the register class and cost for a given definition. /// Typically this just means looking up the representative register class, -/// but for untyped values (MVT::untyped) it means inspecting the node's +/// but for untyped values (MVT::Untyped) it means inspecting the node's /// opcode to determine what register class is being generated. static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, @@ -289,7 +271,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, // Special handling for untyped values. These values can only come from // the expansion of custom DAG-to-DAG patterns. - if (VT == MVT::untyped) { + if (VT == MVT::Untyped) { const SDNode *Node = RegDefPos.GetNode(); unsigned Opcode = Node->getMachineOpcode(); @@ -319,18 +301,16 @@ void ScheduleDAGRRList::Schedule() { DEBUG(dbgs() << "********** List Scheduling BB#" << BB->getNumber() << " '" << BB->getName() << "' **********\n"); -#ifndef NDEBUG - for (int i = 0; i < NumFactors; ++i) { - FactorCount[i] = 0; - } -#endif //!NDEBUG CurCycle = 0; IssueCount = 0; MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); - LiveRegGens.resize(TRI->getNumRegs(), NULL); + // Allocate slots for each physical register, plus one for a special register + // to track the virtual resource of a calling sequence. + LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); + LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); + CallSeqEndForStart.clear(); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -343,18 +323,16 @@ void ScheduleDAGRRList::Schedule() { HazardRec->Reset(); - // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. - if (isBottomUp) - ListScheduleBottomUp(); - else - ListScheduleTopDown(); + // Execute the actual scheduling loop. + ListScheduleBottomUp(); -#ifndef NDEBUG - for (int i = 0; i < NumFactors; ++i) { - DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n"); - } -#endif // !NDEBUG AvailableQueue->releaseState(); + + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } //===----------------------------------------------------------------------===// @@ -376,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { #endif --PredSU->NumSuccsLeft; - if (!ForceUnitLatencies()) { + if (!forceUnitLatencies()) { // Updating predecessor's height. This is now the cycle when the // predecessor can be scheduled without causing a pipeline stall. PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); @@ -403,6 +381,109 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { } } +/// IsChainDependent - Test if Outer is reachable from Inner through +/// chain dependencies. +static bool IsChainDependent(SDNode *Outer, SDNode *Inner, + unsigned NestLevel, + const TargetInstrInfo *TII) { + SDNode *N = Outer; + for (;;) { + if (N == Inner) + return true; + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) + return true; + return false; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + if (NestLevel == 0) + return false; + --NestLevel; + } + } + // Otherwise, find the chain and continue climbing. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return false; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return false; + } +} + +/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate +/// the corresponding (lowered) CALLSEQ_BEGIN node. +/// +/// NestLevel and MaxNested are used in recursion to indcate the current level +/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum +/// level seen so far. +/// +/// TODO: It would be better to give CALLSEQ_END an explicit operand to point +/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. +static SDNode * +FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, + const TargetInstrInfo *TII) { + for (;;) { + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + SDNode *Best = 0; + unsigned BestMaxNest = MaxNest; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + unsigned MyNestLevel = NestLevel; + unsigned MyMaxNest = MaxNest; + if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), + MyNestLevel, MyMaxNest, TII)) + if (!Best || (MyMaxNest > BestMaxNest)) { + Best = New; + BestMaxNest = MyMaxNest; + } + } + assert(Best); + MaxNest = BestMaxNest; + return Best; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + MaxNest = std::max(MaxNest, NestLevel); + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NestLevel != 0); + --NestLevel; + if (NestLevel == 0) + return N; + } + } + // Otherwise, find the chain and continue climbing. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return 0; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return 0; + } +} + /// Call ReleasePred for each predecessor, then update register live def/gen. /// Always update LiveRegDefs for a register dependence even if the current SU /// also defines the register. This effectively create one large live range @@ -440,6 +521,27 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { } } } + + // If we're scheduling a lowered CALLSEQ_END, find the corresponding + // CALLSEQ_BEGIN. Inject an artificial physical register dependence between + // these nodes, to prevent other calls from being interscheduled with them. + unsigned CallResource = TRI->getNumRegs(); + if (!LiveRegDefs[CallResource]) + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + unsigned NestLevel = 0; + unsigned MaxNest = 0; + SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); + + SUnit *Def = &SUnits[N->getNodeId()]; + CallSeqEndForStart[Def] = SU; + + ++NumLiveRegs; + LiveRegDefs[CallResource] = Def; + LiveRegGens[CallResource] = SU; + break; + } } /// Check to see if any of the pending instructions are ready to issue. If @@ -457,8 +559,7 @@ void ScheduleDAGRRList::ReleasePending() { // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { - unsigned ReadyCycle = - isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); + unsigned ReadyCycle = PendingQueue[i]->getHeight(); if (ReadyCycle < MinAvailableCycle) MinAvailableCycle = ReadyCycle; @@ -487,10 +588,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { } else { for (; CurCycle != NextCycle; ++CurCycle) { - if (isBottomUp) - HazardRec->RecedeCycle(); - else - HazardRec->AdvanceCycle(); + HazardRec->RecedeCycle(); } } // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the @@ -511,7 +609,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { // currently need to treat these nodes like real instructions. // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; - unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); + unsigned ReadyCycle = SU->getHeight(); // Bump CurCycle to account for latency. We assume the latency of other // available instructions may be hidden by the stall (not a full pipe stall). @@ -522,7 +620,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { // Calls are scheduled in their preceding cycle, so don't conflict with // hazards from instructions after the call. EmitNode will reset the // scoreboard state before emitting the call. - if (isBottomUp && SU->isCall) + if (SU->isCall) return; // FIXME: For resource conflicts in very long non-pipelined stages, we @@ -530,7 +628,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { int Stalls = 0; while (true) { ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); + HazardRec->getHazardType(SU, -Stalls); if (HT == ScheduleHazardRecognizer::NoHazard) break; @@ -568,17 +666,13 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { HazardRec->Reset(); return; } - if (isBottomUp && SU->isCall) { + if (SU->isCall) { // Calls are scheduled with their preceding instructions. For bottom-up // scheduling, clear the pipeline state before emitting. HazardRec->Reset(); } HazardRec->EmitInstruction(SU); - - if (!isBottomUp && SU->isCall) { - HazardRec->Reset(); - } } static void resetVRegCycle(SUnit *SU); @@ -607,7 +701,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { Sequence.push_back(SU); - AvailableQueue->ScheduledNode(SU); + AvailableQueue->scheduledNode(SU); // If HazardRec is disabled, and each inst counts as one cycle, then // advance CurCycle before ReleasePredecessors to avoid useless pushes to @@ -630,6 +724,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { LiveRegGens[I->getReg()] = NULL; } } + // Release the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = NULL; + LiveRegGens[CallResource] = NULL; + } + } resetVRegCycle(SU); @@ -686,15 +794,41 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { } } + // Reclaim the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + ++NumLiveRegs; + LiveRegDefs[CallResource] = SU; + LiveRegGens[CallResource] = CallSeqEndForStart[SU]; + } + } + + // Release the special call resource dependence, if this is the end + // of a call. + if (LiveRegGens[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = NULL; + LiveRegGens[CallResource] = NULL; + } + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (I->isAssignedRegDep()) { + if (!LiveRegDefs[I->getReg()]) + ++NumLiveRegs; // This becomes the nearest def. Note that an earlier def may still be // pending if this is a two-address node. LiveRegDefs[I->getReg()] = SU; - if (!LiveRegDefs[I->getReg()]) { - ++NumLiveRegs; - } if (LiveRegGens[I->getReg()] == NULL || I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) LiveRegGens[I->getReg()] = I->getSUnit(); @@ -714,7 +848,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { else { AvailableQueue->push(SU); } - AvailableQueue->UnscheduledNode(SU); + AvailableQueue->unscheduledNode(SU); } /// After backtracking, the hazard checker needs to be restored to a state @@ -805,6 +939,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; + // unfolding an x86 DEC64m operation results in store, dec, load which + // can't be handled here so quit + if (NewNodes.size() == 3) + return NULL; + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); @@ -830,7 +969,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { LoadNode->setNodeId(LoadSU->NodeNum); InitNumRegDefsLeft(LoadSU); - ComputeLatency(LoadSU); + computeLatency(LoadSU); } SUnit *NewSU = CreateNewSUnit(N); @@ -848,7 +987,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { NewSU->isCommutable = true; InitNumRegDefsLeft(NewSU); - ComputeLatency(NewSU); + computeLatency(NewSU); // Record all the edges to and from the old SU, by category. SmallVector<SDep, 4> ChainPreds; @@ -1027,7 +1166,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); unsigned NumRes = MCID.getNumDefs(); - for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1042,7 +1181,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SmallSet<unsigned, 4> &RegAdded, SmallVector<unsigned, 4> &LRegs, const TargetRegisterInfo *TRI) { - for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { + for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { // Check if Ref is live. if (!LiveRegDefs[*AliasI]) continue; @@ -1057,6 +1196,31 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, } } +/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered +/// by RegMask, and add them to LRegs. +static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVector<unsigned, 4> &LRegs) { + // Look at all live registers. Skip Reg0 and the special CallResource. + for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { + if (!LiveRegDefs[i]) continue; + if (LiveRegDefs[i] == SU) continue; + if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; + if (RegAdded.insert(i)) + LRegs.push_back(i); + } +} + +/// getNodeRegMask - Returns the register mask attached to an SDNode, if any. +static const uint32_t *getNodeRegMask(const SDNode *N) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (const RegisterMaskSDNode *Op = + dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode())) + return Op->getRegMask(); + return NULL; +} + /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay /// scheduling of the given node to satisfy live physical register dependencies. /// If the specific node is the last one that's available to schedule, do @@ -1108,10 +1272,27 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { if (!Node->isMachineOpcode()) continue; + // If we're in the middle of scheduling a call, don't begin scheduling + // another call. Also, don't allow any physical registers to be live across + // the call. + if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + // Check the special calling-sequence resource. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource]) { + SDNode *Gen = LiveRegGens[CallResource]->getNode(); + while (SDNode *Glued = Gen->getGluedNode()) + Gen = Glued; + if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) + LRegs.push_back(CallResource); + } + } + if (const uint32_t *RegMask = getNodeRegMask(Node)) + CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) + for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } @@ -1300,99 +1481,10 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { std::reverse(Sequence.begin(), Sequence.end()); #ifndef NDEBUG - VerifySchedule(isBottomUp); -#endif -} - -//===----------------------------------------------------------------------===// -// Top-Down Scheduling -//===----------------------------------------------------------------------===// - -/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to -/// the AvailableQueue if the count reaches zero. Also update its cycle bound. -void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { - SUnit *SuccSU = SuccEdge->getSUnit(); - -#ifndef NDEBUG - if (SuccSU->NumPredsLeft == 0) { - dbgs() << "*** Scheduling failed! ***\n"; - SuccSU->dump(this); - dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); - } -#endif - --SuccSU->NumPredsLeft; - - // If all the node's predecessors are scheduled, this node is ready - // to be scheduled. Ignore the special ExitSU node. - if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { - SuccSU->isAvailable = true; - AvailableQueue->push(SuccSU); - } -} - -void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { - // Top down: release successors - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - assert(!I->isAssignedRegDep() && - "The list-tdrr scheduler doesn't yet support physreg dependencies!"); - - ReleaseSucc(SU, &*I); - } -} - -/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending -/// count of its successors. If a successor pending count is zero, add it to -/// the Available queue. -void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { - DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(this)); - - assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); - SU->setDepthToAtLeast(CurCycle); - Sequence.push_back(SU); - - ReleaseSuccessors(SU); - SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); -} - -/// ListScheduleTopDown - The main loop of list scheduling for top-down -/// schedulers. -void ScheduleDAGRRList::ListScheduleTopDown() { - AvailableQueue->setCurCycle(CurCycle); - - // Release any successors of the special Entry node. - ReleaseSuccessors(&EntrySU); - - // All leaves to Available queue. - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - // It is available if it has no predecessors. - if (SUnits[i].Preds.empty()) { - AvailableQueue->push(&SUnits[i]); - SUnits[i].isAvailable = true; - } - } - - // While Available queue is not empty, grab the node with the highest - // priority. If it is not ready put it back. Schedule the node. - Sequence.reserve(SUnits.size()); - while (!AvailableQueue->empty()) { - SUnit *CurSU = AvailableQueue->pop(); - - if (CurSU) - ScheduleNodeTopDown(CurSU); - ++CurCycle; - AvailableQueue->setCurCycle(CurCycle); - } - -#ifndef NDEBUG - VerifySchedule(isBottomUp); + VerifyScheduledSequence(/*isBottomUp=*/true); #endif } - //===----------------------------------------------------------------------===// // RegReductionPriorityQueue Definition //===----------------------------------------------------------------------===// @@ -1437,21 +1529,6 @@ struct bu_ls_rr_sort : public queue_sort { bool operator()(SUnit* left, SUnit* right) const; }; -// td_ls_rr_sort - Priority function for top down register pressure reduction -// scheduler. -struct td_ls_rr_sort : public queue_sort { - enum { - IsBottomUp = false, - HasReadyFilter = false - }; - - RegReductionPQBase *SPQ; - td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; -}; - // src_ls_rr_sort - Priority function for source order scheduler. struct src_ls_rr_sort : public queue_sort { enum { @@ -1510,6 +1587,7 @@ protected: std::vector<SUnit*> Queue; unsigned CurQueueId; bool TracksRegPressure; + bool SrcOrder; // SUnits - The SUnits for the current graph. std::vector<SUnit> *SUnits; @@ -1535,11 +1613,12 @@ public: RegReductionPQBase(MachineFunction &mf, bool hasReadyFilter, bool tracksrp, + bool srcorder, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) : SchedulingPriorityQueue(hasReadyFilter), - CurQueueId(0), TracksRegPressure(tracksrp), + CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); @@ -1610,9 +1689,9 @@ public: int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; - void ScheduledNode(SUnit *SU); + void scheduledNode(SUnit *SU); - void UnscheduledNode(SUnit *SU); + void unscheduledNode(SUnit *SU); protected: bool canClobber(const SUnit *SU, const SUnit *Op); @@ -1654,10 +1733,12 @@ class RegReductionPriorityQueue : public RegReductionPQBase { public: RegReductionPriorityQueue(MachineFunction &mf, bool tracksrp, + bool srcorder, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) - : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), + : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, + tii, tri, tli), Picker(this) {} bool isBottomUp() const { return SF::IsBottomUp; } @@ -1680,10 +1761,7 @@ public: SF DumpPicker = Picker; while (!DumpQueue.empty()) { SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); - if (isBottomUp()) - dbgs() << "Height " << SU->getHeight() << ": "; - else - dbgs() << "Depth " << SU->getDepth() << ": "; + dbgs() << "Height " << SU->getHeight() << ": "; SU->dump(DAG); } } @@ -1692,9 +1770,6 @@ public: typedef RegReductionPriorityQueue<bu_ls_rr_sort> BURegReductionPriorityQueue; -typedef RegReductionPriorityQueue<td_ls_rr_sort> -TDRegReductionPriorityQueue; - typedef RegReductionPriorityQueue<src_ls_rr_sort> SrcRegReductionPriorityQueue; @@ -1919,7 +1994,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { return PDiff; } -void RegReductionPQBase::ScheduledNode(SUnit *SU) { +void RegReductionPQBase::scheduledNode(SUnit *SU) { if (!TracksRegPressure) return; @@ -1988,7 +2063,7 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { dumpRegPressure(); } -void RegReductionPQBase::UnscheduledNode(SUnit *SU) { +void RegReductionPQBase::unscheduledNode(SUnit *SU) { if (!TracksRegPressure) return; @@ -2235,37 +2310,29 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, int LHeight = (int)left->getHeight() + LPenalty; int RHeight = (int)right->getHeight() + RPenalty; - bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && + bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && BUHasStall(left, LHeight, SPQ); - bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) && + bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && BUHasStall(right, RHeight, SPQ); // If scheduling one of the node will cause a pipeline stall, delay it. // If scheduling either one of the node will cause a pipeline stall, sort // them according to their height. if (LStall) { - if (!RStall) { - DEBUG(++FactorCount[FactStall]); + if (!RStall) return 1; - } - if (LHeight != RHeight) { - DEBUG(++FactorCount[FactStall]); + if (LHeight != RHeight) return LHeight > RHeight ? 1 : -1; - } - } else if (RStall) { - DEBUG(++FactorCount[FactStall]); + } else if (RStall) return -1; - } // If either node is scheduling for latency, sort them by height/depth // and latency. - if (!checkPref || (left->SchedulingPref == Sched::Latency || - right->SchedulingPref == Sched::Latency)) { + if (!checkPref || (left->SchedulingPref == Sched::ILP || + right->SchedulingPref == Sched::ILP)) { if (DisableSchedCycles) { - if (LHeight != RHeight) { - DEBUG(++FactorCount[FactHeight]); + if (LHeight != RHeight) return LHeight > RHeight ? 1 : -1; - } } else { // If neither instruction stalls (!LStall && !RStall) then @@ -2274,17 +2341,14 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, int LDepth = left->getDepth() - LPenalty; int RDepth = right->getDepth() - RPenalty; if (LDepth != RDepth) { - DEBUG(++FactorCount[FactDepth]); DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum << ") depth " << LDepth << " vs SU (" << right->NodeNum << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } } - if (left->Latency != right->Latency) { - DEBUG(++FactorCount[FactOther]); + if (left->Latency != right->Latency) return left->Latency > right->Latency ? 1 : -1; - } } return 0; } @@ -2298,7 +2362,6 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { bool LHasPhysReg = left->hasPhysRegDefs; bool RHasPhysReg = right->hasPhysRegDefs; if (LHasPhysReg != RHasPhysReg) { - DEBUG(++FactorCount[FactRegUses]); #ifndef NDEBUG const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; #endif @@ -2324,10 +2387,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; } - if (LPriority != RPriority) { - DEBUG(++FactorCount[FactStatic]); + if (LPriority != RPriority) return LPriority > RPriority; - } // One or both of the nodes are calls and their sethi-ullman numbers are the // same, then keep source order. @@ -2360,18 +2421,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { // This creates more short live intervals. unsigned LDist = closestSucc(left); unsigned RDist = closestSucc(right); - if (LDist != RDist) { - DEBUG(++FactorCount[FactOther]); + if (LDist != RDist) return LDist < RDist; - } // How many registers becomes live when the node is scheduled. unsigned LScratch = calcMaxScratches(left); unsigned RScratch = calcMaxScratches(right); - if (LScratch != RScratch) { - DEBUG(++FactorCount[FactOther]); + if (LScratch != RScratch) return LScratch > RScratch; - } // Comparing latency against a call makes little sense unless the node // is register pressure-neutral. @@ -2386,20 +2443,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { return result > 0; } else { - if (left->getHeight() != right->getHeight()) { - DEBUG(++FactorCount[FactHeight]); + if (left->getHeight() != right->getHeight()) return left->getHeight() > right->getHeight(); - } - if (left->getDepth() != right->getDepth()) { - DEBUG(++FactorCount[FactDepth]); + if (left->getDepth() != right->getDepth()) return left->getDepth() < right->getDepth(); - } } assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); - DEBUG(++FactorCount[FactOther]); return (left->NodeQueueId > right->NodeQueueId); } @@ -2459,13 +2511,11 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. if (LHigh && !RHigh) { - DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" << right->NodeNum << ")\n"); return true; } else if (!LHigh && RHigh) { - DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" << left->NodeNum << ")\n"); return false; @@ -2529,7 +2579,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { RPDiff = SPQ->RegPressureDiff(right, RLiveUses); } if (!DisableSchedRegPressure && LPDiff != RPDiff) { - DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); return LPDiff > RPDiff; @@ -2538,7 +2587,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { bool LReduce = canEnableCoalescing(left); bool RReduce = canEnableCoalescing(right); - DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]); if (LReduce && !RReduce) return false; if (RReduce && !LReduce) return true; } @@ -2546,17 +2594,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); - DEBUG(++FactorCount[FactRegUses]); return LLiveUses < RLiveUses; } if (!DisableSchedStalls) { bool LStall = BUHasStall(left, left->getHeight(), SPQ); bool RStall = BUHasStall(right, right->getHeight(), SPQ); - if (LStall != RStall) { - DEBUG(++FactorCount[FactHeight]); + if (LStall != RStall) return left->getHeight() > right->getHeight(); - } } if (!DisableSchedCriticalPath) { @@ -2565,17 +2610,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " << left->getDepth() << " != SU(" << right->NodeNum << "): " << right->getDepth() << "\n"); - DEBUG(++FactorCount[FactDepth]); return left->getDepth() < right->getDepth(); } } if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { int spread = (int)left->getHeight() - (int)right->getHeight(); - if (std::abs(spread) > MaxReorderWindow) { - DEBUG(++FactorCount[FactHeight]); + if (std::abs(spread) > MaxReorderWindow) return left->getHeight() > right->getHeight(); - } } return BURRSort(left, right, SPQ); @@ -2584,9 +2626,10 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { SUnits = &sunits; // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); + if (!Disable2AddrHack) + AddPseudoTwoAddrDeps(); // Reroute edges to nodes with multiple uses. - if (!TracksRegPressure) + if (!TracksRegPressure && !SrcOrder) PrescheduleNodesWithMultipleUses(); // Calculate node priorities. CalculateSethiUllmanNumbers(); @@ -2628,9 +2671,10 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - const unsigned *ImpDefs + const uint16_t *ImpDefs = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); - if(!ImpDefs) + const uint32_t *RegMask = getNodeRegMask(SU->getNode()); + if(!ImpDefs && !RegMask) return false; for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); @@ -2641,14 +2685,18 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, if (!PI->isAssignedRegDep()) continue; - for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) { - // Return true if SU clobbers this physical register use and the - // definition of the register reaches from DepSU. IsReachable queries a - // topological forward sort of the DAG (following the successors). - if (TRI->regsOverlap(*ImpDef, PI->getReg()) && - scheduleDAG->IsReachable(DepSU, PI->getSUnit())) - return true; - } + if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; + + if (ImpDefs) + for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + // Return true if SU clobbers this physical register use and the + // definition of the register reaches from DepSU. IsReachable queries + // a topological forward sort of the DAG (following the successors). + if (TRI->regsOverlap(*ImpDef, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; } } return false; @@ -2661,16 +2709,17 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetRegisterInfo *TRI) { SDNode *N = SuccSU->getNode(); unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); assert(ImpDefs && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; - const unsigned *SUImpDefs = + const uint16_t *SUImpDefs = TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); - if (!SUImpDefs) - return false; + const uint32_t *SURegMask = getNodeRegMask(SUNode); + if (!SUImpDefs && !SURegMask) + continue; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue || VT == MVT::Other) @@ -2678,6 +2727,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, if (!N->hasAnyUseOfValue(i)) continue; unsigned Reg = ImpDefs[i - NumDefs]; + if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) + return true; + if (!SUImpDefs) + continue; for (;*SUImpDefs; ++SUImpDefs) { unsigned SUReg = *SUImpDefs; if (TRI->regsOverlap(Reg, SUReg)) @@ -2887,69 +2940,6 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { } } -/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled -/// predecessors of the successors of the SUnit SU. Stop when the provided -/// limit is exceeded. -static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, - unsigned Limit) { - unsigned Sum = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - const SUnit *SuccSU = I->getSUnit(); - for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), - EE = SuccSU->Preds.end(); II != EE; ++II) { - SUnit *PredSU = II->getSUnit(); - if (!PredSU->isScheduled) - if (++Sum > Limit) - return Sum; - } - } - return Sum; -} - - -// Top down -bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { - if (int res = checkSpecialNodes(left, right)) - return res < 0; - - unsigned LPriority = SPQ->getNodePriority(left); - unsigned RPriority = SPQ->getNodePriority(right); - bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); - bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); - bool LIsFloater = LIsTarget && left->NumPreds == 0; - bool RIsFloater = RIsTarget && right->NumPreds == 0; - unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; - unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; - - if (left->NumSuccs == 0 && right->NumSuccs != 0) - return false; - else if (left->NumSuccs != 0 && right->NumSuccs == 0) - return true; - - if (LIsFloater) - LBonus -= 2; - if (RIsFloater) - RBonus -= 2; - if (left->NumSuccs == 1) - LBonus += 2; - if (right->NumSuccs == 1) - RBonus += 2; - - if (LPriority+LBonus != RPriority+RBonus) - return LPriority+LBonus < RPriority+RBonus; - - if (left->getDepth() != right->getDepth()) - return left->getDepth() < right->getDepth(); - - if (left->NumSuccsLeft != right->NumSuccsLeft) - return left->NumSuccsLeft > right->NumSuccsLeft; - - assert(left->NodeQueueId && right->NodeQueueId && - "NodeQueueId cannot be zero"); - return (left->NodeQueueId > right->NodeQueueId); -} - //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// @@ -2962,21 +2952,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); BURegReductionPriorityQueue *PQ = - new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); - PQ->setScheduleDAG(SD); - return SD; -} - -llvm::ScheduleDAGSDNodes * -llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, - CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - - TDRegReductionPriorityQueue *PQ = - new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -2990,7 +2966,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = - new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -3005,7 +2981,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, const TargetLowering *TLI = &IS->getTargetLowering(); HybridBURRPriorityQueue *PQ = - new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); @@ -3021,7 +2997,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS, const TargetLowering *TLI = &IS->getTargetLowering(); ILPBURRPriorityQueue *PQ = - new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 71f07d6..69dd813 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -17,6 +17,8 @@ #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -44,20 +46,26 @@ static cl::opt<int> HighLatencyCycles( "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf), + : ScheduleDAG(mf), BB(0), DAG(0), InstrItins(mf.getTarget().getInstrItineraryData()) {} /// Run - perform scheduling. /// -void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos) { +void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { + BB = bb; DAG = dag; - ScheduleDAG::Run(bb, insertPos); + + // Clear the scheduler's SUnit DAG. + ScheduleDAG::clearDAG(); + Sequence.clear(); + + // Invoke the target's selection of scheduler. + Schedule(); } /// NewSUnit - Creates a new SUnit and return a ptr to it. /// -SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { +SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { #ifndef NDEBUG const SUnit *Addr = 0; if (!SUnits.empty()) @@ -79,7 +87,7 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { } SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { - SUnit *SU = NewSUnit(Old->getNode()); + SUnit *SU = newSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; SU->isVRegCycle = Old->isVRegCycle; @@ -302,7 +310,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // If this node has already been processed, stop now. if (NI->getNodeId() != -1) continue; - SUnit *NodeSUnit = NewSUnit(NI); + SUnit *NodeSUnit = newSUnit(NI); // See if anything is glued to this node, if so, add them to glued // nodes. Nodes can have at most one glue input and one glue output. Glue @@ -360,7 +368,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { InitNumRegDefsLeft(NodeSUnit); // Assign the Latency field of NodeSUnit using target-provided information. - ComputeLatency(NodeSUnit); + computeLatency(NodeSUnit); } // Find all call operands. @@ -382,7 +390,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); // Check to see if the scheduler cares about latencies. - bool UnitLatencies = ForceUnitLatencies(); + bool UnitLatencies = forceUnitLatencies(); // Pass 2: add the preds, succs, etc. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { @@ -448,7 +456,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { - ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); + computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } @@ -541,7 +549,7 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { } } -void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { +void ScheduleDAGSDNodes::computeLatency(SUnit *SU) { SDNode *N = SU->getNode(); // TokenFactor operands are considered zero latency, and some schedulers @@ -553,7 +561,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } // Check to see if the scheduler cares about latencies. - if (ForceUnitLatencies()) { + if (forceUnitLatencies()) { SU->Latency = 1; return; } @@ -575,10 +583,10 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { SU->Latency += TII->getInstrLatency(InstrItins, N); } -void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, +void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const{ // Check to see if the scheduler cares about latencies. - if (ForceUnitLatencies()) + if (forceUnitLatencies()) return; if (dep.getKind() != SDep::Data) @@ -621,6 +629,30 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { } } +void ScheduleDAGSDNodes::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} + +#ifndef NDEBUG +/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that +/// their state is consistent with the nodes listed in Sequence. +/// +void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { + unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +} +#endif // NDEBUG + namespace { struct OrderSorter { bool operator()(const std::pair<unsigned, MachineInstr*> &A, @@ -686,9 +718,48 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } +void ScheduleDAGSDNodes:: +EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->CopyDstRC) { + // Copy to physical register. + DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); + assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); + // Find the destination physical register. + unsigned Reg = 0; + for (SUnit::const_succ_iterator II = SU->Succs.begin(), + EE = SU->Succs.end(); II != EE; ++II) { + if (II->isCtrl()) continue; // ignore chain preds + if (II->getReg()) { + Reg = II->getReg(); + break; + } + } + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(VRI->second); + } else { + // Copy from physical register. + assert(I->getReg() && "Unknown physical register!"); + unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) + .addReg(I->getReg()); + } + break; + } +} -/// EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { +/// EmitSchedule - Emit the machine code in scheduled order. Return the new +/// InsertPos and MachineBasicBlock that contains this insertion +/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does +/// not necessarily refer to returned BB. The emitter may split blocks. +MachineBasicBlock *ScheduleDAGSDNodes:: +EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); DenseMap<SDValue, unsigned> VRBaseMap; DenseMap<SUnit*, unsigned> CopyVRBaseMap; @@ -711,7 +782,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { SUnit *SU = Sequence[i]; if (!SU) { // Null SUnit* is a noop. - EmitNoop(); + TII->insertNoop(*Emitter.getBlock(), InsertPos); continue; } @@ -719,7 +790,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // SDNode and any glued SDNodes and append them to the block. if (!SU->getNode()) { // Emit a copy. - EmitPhysRegCopy(SU, CopyVRBaseMap); + EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos); continue; } @@ -784,19 +855,24 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { } // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? + SmallVector<MachineInstr*, 8> DbgMIs; while (DI != DE) { - MachineBasicBlock *InsertBB = Emitter.getBlock(); - MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator(); - if (!(*DI)->isInvalidated()) { - MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap); - if (DbgMI) - InsertBB->insert(Pos, DbgMI); - } + if (!(*DI)->isInvalidated()) + if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) + DbgMIs.push_back(DbgMI); ++DI; } + + MachineBasicBlock *InsertBB = Emitter.getBlock(); + MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); + InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); } - BB = Emitter.getBlock(); InsertPos = Emitter.getInsertPos(); - return BB; + return Emitter.getBlock(); +} + +/// Return the basic block label. +std::string ScheduleDAGSDNodes::getDAGName() const { + return "sunit-dag." + BB->getFullName(); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 9c27b2e..75940ec 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -35,17 +35,20 @@ namespace llvm { /// class ScheduleDAGSDNodes : public ScheduleDAG { public: + MachineBasicBlock *BB; SelectionDAG *DAG; // DAG of the current basic block const InstrItineraryData *InstrItins; + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector<SUnit*> Sequence; + explicit ScheduleDAGSDNodes(MachineFunction &mf); virtual ~ScheduleDAGSDNodes() {} /// Run - perform scheduling. /// - void Run(SelectionDAG *dag, MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos); + void Run(SelectionDAG *dag, MachineBasicBlock *bb); /// isPassiveNode - Return true if the node is a non-scheduled leaf. /// @@ -53,6 +56,7 @@ namespace llvm { if (isa<ConstantSDNode>(Node)) return true; if (isa<ConstantFPSDNode>(Node)) return true; if (isa<RegisterSDNode>(Node)) return true; + if (isa<RegisterMaskSDNode>(Node)) return true; if (isa<GlobalAddressSDNode>(Node)) return true; if (isa<BasicBlockSDNode>(Node)) return true; if (isa<FrameIndexSDNode>(Node)) return true; @@ -67,7 +71,7 @@ namespace llvm { /// NewSUnit - Creates a new SUnit and return a ptr to it. /// - SUnit *NewSUnit(SDNode *N); + SUnit *newSUnit(SDNode *N); /// Clone - Creates a clone of the specified SUnit. It does not copy the /// predecessors / successors info nor the temporary scheduling states. @@ -78,7 +82,7 @@ namespace llvm { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. - virtual void BuildSchedGraph(AliasAnalysis *AA); + void BuildSchedGraph(AliasAnalysis *AA); /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is /// CopyToReg and its only active data operands are CopyFromReg within a @@ -90,30 +94,41 @@ namespace llvm { /// void InitNumRegDefsLeft(SUnit *SU); - /// ComputeLatency - Compute node latency. + /// computeLatency - Compute node latency. /// - virtual void ComputeLatency(SUnit *SU); + virtual void computeLatency(SUnit *SU); - /// ComputeOperandLatency - Override dependence edge latency using + /// computeOperandLatency - Override dependence edge latency using /// operand use/def information /// - virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, + virtual void computeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { } - virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use, + virtual void computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const; - virtual MachineBasicBlock *EmitSchedule(); - /// Schedule - Order nodes according to selected style, filling /// in the Sequence member. /// virtual void Schedule() = 0; + /// VerifyScheduledSequence - Verify that all SUnits are scheduled and + /// consistent with the Sequence of scheduled instructions. + void VerifyScheduledSequence(bool isBottomUp); + + /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock + /// according to the order specified in Sequence. + /// + MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + virtual void dumpNode(const SUnit *SU) const; + void dumpSchedule() const; + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + virtual std::string getDAGName() const; + virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; /// RegDefIter - In place iteration over the values defined by an @@ -159,6 +174,9 @@ namespace llvm { /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. void BuildSchedUnits(); void AddSchedEdges(); + + void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos); }; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 430283d..c851291 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -1,4 +1,4 @@ -//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===// +//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -31,6 +31,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" #include <climits> using namespace llvm; @@ -38,15 +39,15 @@ STATISTIC(NumNoops , "Number of noops inserted"); STATISTIC(NumStalls, "Number of pipeline stalls"); static RegisterScheduler - tdListDAGScheduler("list-td", "Top-down list scheduler", - createTDListDAGScheduler); + VLIWScheduler("vliw-td", "VLIW scheduler", + createVLIWDAGScheduler); namespace { //===----------------------------------------------------------------------===// -/// ScheduleDAGList - The actual list scheduler implementation. This supports -/// top-down scheduling. +/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This +/// supports / top-down scheduling. /// -class ScheduleDAGList : public ScheduleDAGSDNodes { +class ScheduleDAGVLIW : public ScheduleDAGSDNodes { private: /// AvailableQueue - The priority queue to use for the available SUnits. /// @@ -61,16 +62,20 @@ private: /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + public: - ScheduleDAGList(MachineFunction &mf, + ScheduleDAGVLIW(MachineFunction &mf, + AliasAnalysis *aa, SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) { + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { const TargetMachine &tm = mf.getTarget(); HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); } - ~ScheduleDAGList() { + ~ScheduleDAGVLIW() { delete HazardRec; delete AvailableQueue; } @@ -78,23 +83,25 @@ public: void Schedule(); private: - void ReleaseSucc(SUnit *SU, const SDep &D); - void ReleaseSuccessors(SUnit *SU); - void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); - void ListScheduleTopDown(); + void releaseSucc(SUnit *SU, const SDep &D); + void releaseSuccessors(SUnit *SU); + void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void listScheduleTopDown(); }; } // end anonymous namespace /// Schedule - Schedule the DAG using list scheduling. -void ScheduleDAGList::Schedule() { - DEBUG(dbgs() << "********** List Scheduling **********\n"); +void ScheduleDAGVLIW::Schedule() { + DEBUG(dbgs() + << "********** List Scheduling BB#" << BB->getNumber() + << " '" << BB->getName() << "' **********\n"); // Build the scheduling graph. - BuildSchedGraph(NULL); + BuildSchedGraph(AA); AvailableQueue->initNodes(SUnits); - ListScheduleTopDown(); + listScheduleTopDown(); AvailableQueue->releaseState(); } @@ -103,9 +110,9 @@ void ScheduleDAGList::Schedule() { // Top-Down Scheduling //===----------------------------------------------------------------------===// -/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to /// the PendingQueue if the count reaches zero. Also update its cycle bound. -void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { +void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { SUnit *SuccSU = D.getSUnit(); #ifndef NDEBUG @@ -122,25 +129,26 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. - if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { PendingQueue.push_back(SuccSU); + } } -void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { +void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) { // Top down: release successors. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { assert(!I->isAssignedRegDep() && "The list-td scheduler doesn't yet support physreg dependencies!"); - ReleaseSucc(SU, *I); + releaseSucc(SU, *I); } } -/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. -void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { +void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); @@ -148,20 +156,20 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); SU->setDepthToAtLeast(CurCycle); - ReleaseSuccessors(SU); + releaseSuccessors(SU); SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); + AvailableQueue->scheduledNode(SU); } -/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// listScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. -void ScheduleDAGList::ListScheduleTopDown() { +void ScheduleDAGVLIW::listScheduleTopDown() { unsigned CurCycle = 0; // Release any successors of the special Entry node. - ReleaseSuccessors(&EntrySU); + releaseSuccessors(&EntrySU); - // All leaves to Available queue. + // All leaves to AvailableQueue. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { // It is available if it has no predecessors. if (SUnits[i].Preds.empty()) { @@ -170,7 +178,7 @@ void ScheduleDAGList::ListScheduleTopDown() { } } - // While Available queue is not empty, grab the node with the highest + // While AvailableQueue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector<SUnit*> NotReady; Sequence.reserve(SUnits.size()); @@ -184,7 +192,8 @@ void ScheduleDAGList::ListScheduleTopDown() { PendingQueue[i] = PendingQueue.back(); PendingQueue.pop_back(); --i; --e; - } else { + } + else { assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); } } @@ -192,6 +201,8 @@ void ScheduleDAGList::ListScheduleTopDown() { // If there are no instructions available, don't try to issue anything, and // don't advance the hazard recognizer. if (AvailableQueue->empty()) { + // Reset DFA state. + AvailableQueue->scheduledNode(0); ++CurCycle; continue; } @@ -223,7 +234,7 @@ void ScheduleDAGList::ListScheduleTopDown() { // If we found a node to schedule, do it now. if (FoundSUnit) { - ScheduleNodeTopDown(FoundSUnit, CurCycle); + scheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); // If this is a pseudo-op node, we don't want to increment the current @@ -250,7 +261,7 @@ void ScheduleDAGList::ListScheduleTopDown() { } #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/false); + VerifyScheduledSequence(/*isBottomUp=*/false); #endif } @@ -258,8 +269,8 @@ void ScheduleDAGList::ListScheduleTopDown() { // Public Constructor Functions //===----------------------------------------------------------------------===// -/// createTDListDAGScheduler - This creates a top-down list scheduler. +/// createVLIWDAGScheduler - This creates a top-down list scheduler. ScheduleDAGSDNodes * -llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { - return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue()); +llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS)); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 20bea8e..92671d1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" @@ -63,6 +62,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown FP format"); + case MVT::f16: return &APFloat::IEEEhalf; case MVT::f32: return &APFloat::IEEEsingle; case MVT::f64: return &APFloat::IEEEdouble; case MVT::f80: return &APFloat::x87DoubleExtended; @@ -125,20 +125,29 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { if (i == e) return false; // Do not accept build_vectors that aren't all constants or which have non-~0 - // elements. + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target and + // a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all ones, not whether the individual + // constants are. SDValue NotZero = N->getOperand(i); + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); if (isa<ConstantSDNode>(NotZero)) { - if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue()) + if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() < + EltSize) return false; } else if (isa<ConstantFPSDNode>(NotZero)) { - if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF(). - bitcastToAPInt().isAllOnesValue()) + if (cast<ConstantFPSDNode>(NotZero)->getValueAPF() + .bitcastToAPInt().countTrailingOnes() < EltSize) return false; } else return false; // Okay, we have at least one ~0 value, check to see if the rest match or are - // undefs. + // undefs. Even with the above element type twiddling, this should be OK, as + // the same type legalization should have applied to all the elements. for (++i; i != e; ++i) if (N->getOperand(i) != NotZero && N->getOperand(i).getOpcode() != ISD::UNDEF) @@ -384,7 +393,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::Register: ID.AddInteger(cast<RegisterSDNode>(N)->getReg()); break; - + case ISD::RegisterMask: + ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask()); + break; case ISD::SRCVALUE: ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); break; @@ -475,7 +486,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { /// static inline unsigned encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, - bool isNonTemporal) { + bool isNonTemporal, bool isInvariant) { assert((ConvType & 3) == ConvType && "ConvType may not require more than 2 bits!"); assert((AM & 7) == AM && @@ -483,7 +494,8 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, return ConvType | (AM << 2) | (isVolatile << 5) | - (isNonTemporal << 6); + (isNonTemporal << 6) | + (isInvariant << 7); } //===----------------------------------------------------------------------===// @@ -564,6 +576,12 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes, void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){ SmallVector<SDNode*, 16> DeadNodes(1, N); + + // Create a dummy node that adds a reference to the root node, preventing + // it from being deleted. (This matters if the root is an operand of the + // dead node.) + HandleSDNode Dummy(getRoot()); + RemoveDeadNodes(DeadNodes, UpdateListener); } @@ -834,9 +852,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { } // EntryNode could meaningfully have debug info if we can find it... -SelectionDAG::SelectionDAG(const TargetMachine &tm) +SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), + OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); Ordering = new SDNodeOrdering(); @@ -1025,16 +1043,14 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { return getConstantFP(APFloat((float)Val), VT, isTarget); else if (EltVT==MVT::f64) return getConstantFP(APFloat(Val), VT, isTarget); - else if (EltVT==MVT::f80 || EltVT==MVT::f128) { + else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) { bool ignored; APFloat apf = APFloat(Val); apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(apf, VT, isTarget); - } else { - assert(0 && "Unsupported type in getConstantFP"); - return SDValue(); - } + } else + llvm_unreachable("Unsupported type in getConstantFP"); } SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, @@ -1369,6 +1385,20 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { return SDValue(N, 0); } +SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0); + ID.AddPointer(RegMask); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; @@ -1598,7 +1628,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); return (KnownZero & Mask) == Mask; } @@ -1607,15 +1637,12 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, /// known to be either zero or one and return them in the KnownZero/KnownOne /// bitsets. This code only analyzes bits in Mask, in order to short-circuit /// processing. -void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth) const { - unsigned BitWidth = Mask.getBitWidth(); - assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() && - "Mask size mismatches value type size!"); +void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, + APInt &KnownOne, unsigned Depth) const { + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. - if (Depth == 6 || Mask == 0) + if (Depth == 6) return; // Limit search depth. APInt KnownZero2, KnownOne2; @@ -1623,14 +1650,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask; - KnownZero = ~KnownOne & Mask; + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); + KnownZero = ~KnownOne; return; case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero, - KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1640,9 +1666,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownZero |= KnownZero2; return; case ISD::OR: - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne, - KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1652,8 +1677,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownOne |= KnownOne2; return; case ISD::XOR: { - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1665,9 +1690,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::MUL: { - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1686,33 +1710,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, LeadZ = std::min(LeadZ, BitWidth); KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | APInt::getHighBitsSet(BitWidth, LeadZ); - KnownZero &= Mask; return; } case ISD::UDIV: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(0), - AllOnes, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(Op.getOperand(1), - AllOnes, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); return; } case ISD::SELECT: - ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1721,8 +1741,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownZero &= KnownZero2; return; case ISD::SELECT_CC: - ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1754,8 +1774,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt), - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero <<= ShAmt; KnownOne <<= ShAmt; @@ -1772,13 +1791,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt), - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); KnownZero |= HighBits; // High bits known zero. } return; @@ -1790,15 +1808,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - APInt InDemandedMask = (Mask << ShAmt); // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; - if (HighBits.getBoolValue()) - InDemandedMask |= APInt::getSignBit(BitWidth); + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, - Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); @@ -1820,10 +1834,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Sign extension. Compute the demanded bits in the result that are not // present in the input. - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask; + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); APInt InSignBit = APInt::getSignBit(EBits); - APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits); + APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); // If the sign extended bits are demanded, we know that the sign // bit is demanded. @@ -1831,8 +1845,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (NewBits.getBoolValue()) InputDemandedBits |= InSignBit; - ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownOne &= InputDemandedBits; + KnownZero &= InputDemandedBits; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the @@ -1850,7 +1865,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: { unsigned LowBits = Log2_32(BitWidth)+1; KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); @@ -1858,22 +1875,23 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); if (ISD::isZEXTLoad(Op.getNode())) { - LoadSDNode *LD = cast<LoadSDNode>(Op); EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarType().getSizeInBits(); - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + } else if (const MDNode *Ranges = LD->getRanges()) { + computeMaskedBitsLoad(*Ranges, KnownZero); } return; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask.trunc(InBits); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; @@ -1883,17 +1901,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InSignBit = APInt::getSignBit(InBits); - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask.trunc(InBits); - - // If any of the sign extended bits are demanded, we know that the sign - // bit is demanded. Temporarily set this bit in the mask for our callee. - if (NewBits.getBoolValue()) - InMask |= InSignBit; + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // Note if the sign bit is known to be zero or one. bool SignBitKnownZero = KnownZero.isNegative(); @@ -1901,13 +1913,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, assert(!(SignBitKnownZero && SignBitKnownOne) && "Sign bit can't be known to be both zero and one!"); - // If the sign bit wasn't actually demanded by our caller, we don't - // want it set in the KnownZero and KnownOne result values. Reset the - // mask and reapply it to the result values. - InMask = Mask.trunc(InBits); - KnownZero &= InMask; - KnownOne &= InMask; - KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -1921,10 +1926,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask.trunc(InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); return; @@ -1932,10 +1936,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask.zext(InBits); KnownZero = KnownZero.zext(InBits); KnownOne = KnownOne.zext(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.trunc(BitWidth); KnownOne = KnownOne.trunc(BitWidth); @@ -1944,9 +1947,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::AssertZext: { EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, - KnownOne, Depth+1); - KnownZero |= (~InMask) & Mask; + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero |= (~InMask); return; } case ISD::FGETSIGN: @@ -1963,8 +1965,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2, - Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -1972,7 +1973,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if ((KnownZero2 & MaskV) == MaskV) { unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); } } } @@ -1983,13 +1984,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - APInt Mask2 = APInt::getLowBitsSet(BitWidth, - BitWidth - Mask.countLeadingZeros()); - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); - ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); KnownZeroOut = std::min(KnownZeroOut, KnownZero2.countTrailingOnes()); @@ -2013,7 +2012,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (RA.isPowerOf2()) { APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -2028,10 +2027,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // the upper bits are all one. if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; - - KnownZero &= Mask; - KnownOne &= Mask; - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); } } @@ -2041,9 +2036,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - APInt Mask2 = LowBits & Mask; - KnownZero |= ~LowBits & Mask; - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); + KnownZero |= ~LowBits; + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1); assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); break; } @@ -2051,16 +2045,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne, - Depth+1); - ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, - Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); return; } case ISD::FrameIndex: @@ -2080,8 +2071,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this, - Depth); + TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); return; } } @@ -2205,12 +2195,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)) == Mask) + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) return VTBits; // If we are subtracting one from a positive number, there is no carry @@ -2221,8 +2210,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); if (Tmp2 == 1) return 1; - return std::min(Tmp, Tmp2)-1; - break; + return std::min(Tmp, Tmp2)-1; case ISD::SUB: Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); @@ -2232,11 +2220,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)) == Mask) + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) return VTBits; // If the input is known to be positive (the sign bit is known clear), @@ -2251,8 +2238,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp == 1) return 1; // Early out. - return std::min(Tmp, Tmp2)-1; - break; + return std::min(Tmp, Tmp2)-1; case ISD::TRUNCATE: // FIXME: it's tricky to do anything useful for this, but it is an important // case for targets like X86. @@ -2286,9 +2272,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + APInt Mask; if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; } else if (KnownOne.isNegative()) { // sign bit is 1; @@ -2328,7 +2314,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { // If we're told that NaNs won't happen, assume they won't. - if (NoNaNsFPMath) + if (getTarget().Options.NoNaNsFPMath) return true; // If the value is a constant, we can obviously see if it is a NaN or not. @@ -2423,8 +2409,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::CTPOP: return getConstant(Val.countPopulation(), VT); case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: return getConstant(Val.countLeadingZeros(), VT); case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: return getConstant(Val.countTrailingZeros(), VT); } } @@ -2440,7 +2428,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::FABS: V.clearSign(); return getConstantFP(V, VT); - case ISD::FP_ROUND: case ISD::FP_EXTEND: { bool ignored; // This can return overflow, underflow, or inexact; we don't care. @@ -2561,17 +2548,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, "Vector element count mismatch!"); if (OpOpcode == ISD::TRUNCATE) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); - else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || - OpOpcode == ISD::ANY_EXTEND) { + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) { // If the source is smaller than the dest, we still need an extend. if (Operand.getNode()->getOperand(0).getValueType().getScalarType() .bitsLT(VT.getScalarType())) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); - else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) + if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); - else - return Operand.getNode()->getOperand(0); + return Operand.getNode()->getOperand(0); } + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); break; case ISD::BITCAST: // Basic sanity checking. @@ -2601,7 +2589,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, break; case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if (UnsafeFPMath && OpOpcode == ISD::FSUB) + if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), Operand.getNode()->getOperand(0)); if (OpOpcode == ISD::FNEG) // --X -> X @@ -2736,7 +2724,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (UnsafeFPMath) { + if (getTarget().Options.UnsafeFPMath) { if (Opcode == ISD::FADD) { // 0+x --> x if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) @@ -3005,6 +2993,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, default: break; } } + + if (Opcode == ISD::FP_ROUND) { + APFloat V = N1CFP->getValueAPF(); // make copy + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } } // Canonicalize an UNDEF to the RHS, even over a constant. @@ -3059,7 +3057,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (UnsafeFPMath) + if (getTarget().Options.UnsafeFPMath) return N2; break; case ISD::MUL: @@ -3133,16 +3131,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::SELECT: if (N1C) { if (N1C->getZExtValue()) - return N2; // select true, X, Y -> X - else - return N3; // select false, X, Y -> Y + return N2; // select true, X, Y -> X + return N3; // select false, X, Y -> Y } if (N2 == N3) return N2; // select C, X, X -> X break; case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); - break; case ISD::INSERT_SUBVECTOR: { SDValue Index = N3; if (VT.isSimple() && N1.getValueType().isSimple() @@ -3275,8 +3271,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, - const TargetLowering &TLI, - std::string &Str, unsigned Offset) { + const TargetLowering &TLI, StringRef Str) { // Handle vector with all elements zero. if (Str.empty()) { if (VT.isInteger()) @@ -3294,15 +3289,18 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, } assert(!VT.isVector() && "Can't handle vector type here!"); - unsigned NumBits = VT.getSizeInBits(); - unsigned MSB = NumBits / 8; + unsigned NumVTBytes = VT.getSizeInBits() / 8; + unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); + uint64_t Val = 0; - if (TLI.isLittleEndian()) - Offset = Offset + MSB - 1; - for (unsigned i = 0; i != MSB; ++i) { - Val = (Val << 8) | (unsigned char)Str[Offset]; - Offset += TLI.isLittleEndian() ? -1 : 1; + if (TLI.isLittleEndian()) { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Str[i] << i*8; + } else { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; } + return DAG.getConstant(Val, VT); } @@ -3317,7 +3315,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, /// isMemSrcFromString - Returns true if memcpy source is a string constant. /// -static bool isMemSrcFromString(SDValue Src, std::string &Str) { +static bool isMemSrcFromString(SDValue Src, StringRef &Str) { unsigned SrcDelta = 0; GlobalAddressSDNode *G = NULL; if (Src.getOpcode() == ISD::GlobalAddress) @@ -3331,11 +3329,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { if (!G) return false; - const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal()); - if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false)) - return true; - - return false; + return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false); } /// FindOptimalMemOpLowering - Determines the optimial series memory ops @@ -3345,7 +3339,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, + bool IsZeroVal, bool MemcpyStrSrc, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -3359,7 +3353,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - NonScalarIntSafe, MemcpyStrSrc, + IsZeroVal, MemcpyStrSrc, DAG.getMachineFunction()); if (VT == MVT::Other) { @@ -3438,7 +3432,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; - std::string Str; + StringRef Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); @@ -3475,7 +3469,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // We only handle zero vectors here. // FIXME: Handle other cases where store of vector immediate is done in // a single instruction. - Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); + Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, @@ -3562,7 +3556,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = DAG.getLoad(VT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), SrcPtrInfo.getWithOffset(SrcOff), isVol, - false, SrcAlign); + false, false, SrcAlign); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -3606,11 +3600,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - bool NonScalarIntSafe = + bool IsZeroVal = isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, - NonScalarIntSafe, false, DAG, TLI)) + IsZeroVal, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3717,8 +3711,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMCPY), false, - /*isReturnValueUsed=*/false, + TLI.getLibcallCallingConv(RTLIB::MEMCPY), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), TLI.getPointerTy()), Args, *this, dl); @@ -3769,8 +3764,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false, - /*isReturnValueUsed=*/false, + TLI.getLibcallCallingConv(RTLIB::MEMMOVE), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), TLI.getPointerTy()), Args, *this, dl); @@ -3829,8 +3825,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMSET), false, - /*isReturnValueUsed=*/false, + TLI.getLibcallCallingConv(RTLIB::MEMSET), + /*isTailCall=*/false, + /*doesNotReturn*/false, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), TLI.getPointerTy()), Args, *this, dl); @@ -4138,8 +4135,9 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, - bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + bool isVolatile, bool isNonTemporal, bool isInvariant, + unsigned Alignment, const MDNode *TBAAInfo, + const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4150,6 +4148,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, Flags |= MachineMemOperand::MOVolatile; if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; + if (isInvariant) + Flags |= MachineMemOperand::MOInvariant; // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. @@ -4159,7 +4159,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - TBAAInfo); + TBAAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -4196,7 +4196,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), - MMO->isNonTemporal())); + MMO->isNonTemporal(), + MMO->isInvariant())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); @@ -4213,10 +4214,13 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + bool isInvariant, unsigned Alignment, + const MDNode *TBAAInfo, + const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, - PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo, Ranges); } SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, @@ -4226,7 +4230,7 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, unsigned Alignment, const MDNode *TBAAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, - PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment, + PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment, TBAAInfo); } @@ -4239,8 +4243,8 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, "Load is already a indexed load!"); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), + false, LD->getAlignment()); } SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, @@ -4282,7 +4286,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(VT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal())); + MMO->isNonTemporal(), MMO->isInvariant())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4349,7 +4353,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(SVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal())); + MMO->isNonTemporal(), MMO->isInvariant())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4903,6 +4907,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, return N; } +/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away +/// the line number information on the merged node since it is not possible to +/// preserve the information that operation is associated with multiple lines. +/// This will make the debugger working better at -O0, were there is a higher +/// probability having other instructions associated with that line. +/// +SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) { + DebugLoc NLoc = N->getDebugLoc(); + if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) { + N->setDebugLoc(DebugLoc()); + } + return N; +} + /// MorphNodeTo - This *mutates* the specified node to have the specified /// return type, opcode, and operands. /// @@ -4924,7 +4942,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) - return ON; + return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc()); } if (!RemoveNodeFromCSEMaps(N)) @@ -5128,8 +5146,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, FoldingSetNodeID ID; AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) - return cast<MachineSDNode>(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL)); + } } // Allocate a new MachineSDNode. @@ -5290,6 +5309,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (FromN == getRoot()) + setRoot(To); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5335,6 +5358,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To, getRoot().getResNo())); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5373,6 +5400,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To[getRoot().getResNo()])); } /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving @@ -5431,6 +5462,10 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot()) + setRoot(To); } namespace { @@ -5657,7 +5692,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, MachineMemOperand *mmo) : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal()); + MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); assert(isNonTemporal() == MMO->isNonTemporal() && "Non-temporal encoding error!"); @@ -5670,7 +5705,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, : SDNode(Opc, dl, VTs, Ops, NumOps), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal()); + MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } @@ -5846,565 +5881,6 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const { return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); } -std::string SDNode::getOperationName(const SelectionDAG *G) const { - switch (getOpcode()) { - default: - if (getOpcode() < ISD::BUILTIN_OP_END) - return "<<Unknown DAG Node>>"; - if (isMachineOpcode()) { - if (G) - if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) - if (getMachineOpcode() < TII->getNumOpcodes()) - return TII->get(getMachineOpcode()).getName(); - return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; - } - if (G) { - const TargetLowering &TLI = G->getTargetLoweringInfo(); - const char *Name = TLI.getTargetNodeName(getOpcode()); - if (Name) return Name; - return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; - } - return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; - -#ifndef NDEBUG - case ISD::DELETED_NODE: - return "<<Deleted Node!>>"; -#endif - case ISD::PREFETCH: return "Prefetch"; - case ISD::MEMBARRIER: return "MemBarrier"; - case ISD::ATOMIC_FENCE: return "AtomicFence"; - case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; - case ISD::ATOMIC_SWAP: return "AtomicSwap"; - case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; - case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; - case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; - case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; - case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; - case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; - case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; - case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; - case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; - case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; - case ISD::ATOMIC_LOAD: return "AtomicLoad"; - case ISD::ATOMIC_STORE: return "AtomicStore"; - case ISD::PCMARKER: return "PCMarker"; - case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; - case ISD::SRCVALUE: return "SrcValue"; - case ISD::MDNODE_SDNODE: return "MDNode"; - case ISD::EntryToken: return "EntryToken"; - case ISD::TokenFactor: return "TokenFactor"; - case ISD::AssertSext: return "AssertSext"; - case ISD::AssertZext: return "AssertZext"; - - case ISD::BasicBlock: return "BasicBlock"; - case ISD::VALUETYPE: return "ValueType"; - case ISD::Register: return "Register"; - - case ISD::Constant: return "Constant"; - case ISD::ConstantFP: return "ConstantFP"; - case ISD::GlobalAddress: return "GlobalAddress"; - case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; - case ISD::FrameIndex: return "FrameIndex"; - case ISD::JumpTable: return "JumpTable"; - case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; - case ISD::RETURNADDR: return "RETURNADDR"; - case ISD::FRAMEADDR: return "FRAMEADDR"; - case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; - case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; - case ISD::LSDAADDR: return "LSDAADDR"; - case ISD::EHSELECTION: return "EHSELECTION"; - case ISD::EH_RETURN: return "EH_RETURN"; - case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; - case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; - case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP"; - case ISD::ConstantPool: return "ConstantPool"; - case ISD::ExternalSymbol: return "ExternalSymbol"; - case ISD::BlockAddress: return "BlockAddress"; - case ISD::INTRINSIC_WO_CHAIN: - case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: { - unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; - unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); - if (IID < Intrinsic::num_intrinsics) - return Intrinsic::getName((Intrinsic::ID)IID); - else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) - return TII->getName(IID); - llvm_unreachable("Invalid intrinsic ID"); - } - - case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; - case ISD::TargetConstant: return "TargetConstant"; - case ISD::TargetConstantFP:return "TargetConstantFP"; - case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; - case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; - case ISD::TargetFrameIndex: return "TargetFrameIndex"; - case ISD::TargetJumpTable: return "TargetJumpTable"; - case ISD::TargetConstantPool: return "TargetConstantPool"; - case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; - case ISD::TargetBlockAddress: return "TargetBlockAddress"; - - case ISD::CopyToReg: return "CopyToReg"; - case ISD::CopyFromReg: return "CopyFromReg"; - case ISD::UNDEF: return "undef"; - case ISD::MERGE_VALUES: return "merge_values"; - case ISD::INLINEASM: return "inlineasm"; - case ISD::EH_LABEL: return "eh_label"; - case ISD::HANDLENODE: return "handlenode"; - - // Unary operators - case ISD::FABS: return "fabs"; - case ISD::FNEG: return "fneg"; - case ISD::FSQRT: return "fsqrt"; - case ISD::FSIN: return "fsin"; - case ISD::FCOS: return "fcos"; - case ISD::FTRUNC: return "ftrunc"; - case ISD::FFLOOR: return "ffloor"; - case ISD::FCEIL: return "fceil"; - case ISD::FRINT: return "frint"; - case ISD::FNEARBYINT: return "fnearbyint"; - case ISD::FEXP: return "fexp"; - case ISD::FEXP2: return "fexp2"; - case ISD::FLOG: return "flog"; - case ISD::FLOG2: return "flog2"; - case ISD::FLOG10: return "flog10"; - - // Binary operators - case ISD::ADD: return "add"; - case ISD::SUB: return "sub"; - case ISD::MUL: return "mul"; - case ISD::MULHU: return "mulhu"; - case ISD::MULHS: return "mulhs"; - case ISD::SDIV: return "sdiv"; - case ISD::UDIV: return "udiv"; - case ISD::SREM: return "srem"; - case ISD::UREM: return "urem"; - case ISD::SMUL_LOHI: return "smul_lohi"; - case ISD::UMUL_LOHI: return "umul_lohi"; - case ISD::SDIVREM: return "sdivrem"; - case ISD::UDIVREM: return "udivrem"; - case ISD::AND: return "and"; - case ISD::OR: return "or"; - case ISD::XOR: return "xor"; - case ISD::SHL: return "shl"; - case ISD::SRA: return "sra"; - case ISD::SRL: return "srl"; - case ISD::ROTL: return "rotl"; - case ISD::ROTR: return "rotr"; - case ISD::FADD: return "fadd"; - case ISD::FSUB: return "fsub"; - case ISD::FMUL: return "fmul"; - case ISD::FDIV: return "fdiv"; - case ISD::FMA: return "fma"; - case ISD::FREM: return "frem"; - case ISD::FCOPYSIGN: return "fcopysign"; - case ISD::FGETSIGN: return "fgetsign"; - case ISD::FPOW: return "fpow"; - - case ISD::FPOWI: return "fpowi"; - case ISD::SETCC: return "setcc"; - case ISD::SELECT: return "select"; - case ISD::VSELECT: return "vselect"; - case ISD::SELECT_CC: return "select_cc"; - case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; - case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; - case ISD::CONCAT_VECTORS: return "concat_vectors"; - case ISD::INSERT_SUBVECTOR: return "insert_subvector"; - case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; - case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; - case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; - case ISD::CARRY_FALSE: return "carry_false"; - case ISD::ADDC: return "addc"; - case ISD::ADDE: return "adde"; - case ISD::SADDO: return "saddo"; - case ISD::UADDO: return "uaddo"; - case ISD::SSUBO: return "ssubo"; - case ISD::USUBO: return "usubo"; - case ISD::SMULO: return "smulo"; - case ISD::UMULO: return "umulo"; - case ISD::SUBC: return "subc"; - case ISD::SUBE: return "sube"; - case ISD::SHL_PARTS: return "shl_parts"; - case ISD::SRA_PARTS: return "sra_parts"; - case ISD::SRL_PARTS: return "srl_parts"; - - // Conversion operators. - case ISD::SIGN_EXTEND: return "sign_extend"; - case ISD::ZERO_EXTEND: return "zero_extend"; - case ISD::ANY_EXTEND: return "any_extend"; - case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; - case ISD::TRUNCATE: return "truncate"; - case ISD::FP_ROUND: return "fp_round"; - case ISD::FLT_ROUNDS_: return "flt_rounds"; - case ISD::FP_ROUND_INREG: return "fp_round_inreg"; - case ISD::FP_EXTEND: return "fp_extend"; - - case ISD::SINT_TO_FP: return "sint_to_fp"; - case ISD::UINT_TO_FP: return "uint_to_fp"; - case ISD::FP_TO_SINT: return "fp_to_sint"; - case ISD::FP_TO_UINT: return "fp_to_uint"; - case ISD::BITCAST: return "bitcast"; - case ISD::FP16_TO_FP32: return "fp16_to_fp32"; - case ISD::FP32_TO_FP16: return "fp32_to_fp16"; - - case ISD::CONVERT_RNDSAT: { - switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { - default: llvm_unreachable("Unknown cvt code!"); - case ISD::CVT_FF: return "cvt_ff"; - case ISD::CVT_FS: return "cvt_fs"; - case ISD::CVT_FU: return "cvt_fu"; - case ISD::CVT_SF: return "cvt_sf"; - case ISD::CVT_UF: return "cvt_uf"; - case ISD::CVT_SS: return "cvt_ss"; - case ISD::CVT_SU: return "cvt_su"; - case ISD::CVT_US: return "cvt_us"; - case ISD::CVT_UU: return "cvt_uu"; - } - } - - // Control flow instructions - case ISD::BR: return "br"; - case ISD::BRIND: return "brind"; - case ISD::BR_JT: return "br_jt"; - case ISD::BRCOND: return "brcond"; - case ISD::BR_CC: return "br_cc"; - case ISD::CALLSEQ_START: return "callseq_start"; - case ISD::CALLSEQ_END: return "callseq_end"; - - // Other operators - case ISD::LOAD: return "load"; - case ISD::STORE: return "store"; - case ISD::VAARG: return "vaarg"; - case ISD::VACOPY: return "vacopy"; - case ISD::VAEND: return "vaend"; - case ISD::VASTART: return "vastart"; - case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; - case ISD::EXTRACT_ELEMENT: return "extract_element"; - case ISD::BUILD_PAIR: return "build_pair"; - case ISD::STACKSAVE: return "stacksave"; - case ISD::STACKRESTORE: return "stackrestore"; - case ISD::TRAP: return "trap"; - - // Bit manipulation - case ISD::BSWAP: return "bswap"; - case ISD::CTPOP: return "ctpop"; - case ISD::CTTZ: return "cttz"; - case ISD::CTLZ: return "ctlz"; - - // Trampolines - case ISD::INIT_TRAMPOLINE: return "init_trampoline"; - case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; - - case ISD::CONDCODE: - switch (cast<CondCodeSDNode>(this)->get()) { - default: llvm_unreachable("Unknown setcc condition!"); - case ISD::SETOEQ: return "setoeq"; - case ISD::SETOGT: return "setogt"; - case ISD::SETOGE: return "setoge"; - case ISD::SETOLT: return "setolt"; - case ISD::SETOLE: return "setole"; - case ISD::SETONE: return "setone"; - - case ISD::SETO: return "seto"; - case ISD::SETUO: return "setuo"; - case ISD::SETUEQ: return "setue"; - case ISD::SETUGT: return "setugt"; - case ISD::SETUGE: return "setuge"; - case ISD::SETULT: return "setult"; - case ISD::SETULE: return "setule"; - case ISD::SETUNE: return "setune"; - - case ISD::SETEQ: return "seteq"; - case ISD::SETGT: return "setgt"; - case ISD::SETGE: return "setge"; - case ISD::SETLT: return "setlt"; - case ISD::SETLE: return "setle"; - case ISD::SETNE: return "setne"; - } - } -} - -const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { - switch (AM) { - default: - return ""; - case ISD::PRE_INC: - return "<pre-inc>"; - case ISD::PRE_DEC: - return "<pre-dec>"; - case ISD::POST_INC: - return "<post-inc>"; - case ISD::POST_DEC: - return "<post-dec>"; - } -} - -std::string ISD::ArgFlagsTy::getArgFlagsString() { - std::string S = "< "; - - if (isZExt()) - S += "zext "; - if (isSExt()) - S += "sext "; - if (isInReg()) - S += "inreg "; - if (isSRet()) - S += "sret "; - if (isByVal()) - S += "byval "; - if (isNest()) - S += "nest "; - if (getByValAlign()) - S += "byval-align:" + utostr(getByValAlign()) + " "; - if (getOrigAlign()) - S += "orig-align:" + utostr(getOrigAlign()) + " "; - if (getByValSize()) - S += "byval-size:" + utostr(getByValSize()) + " "; - return S + ">"; -} - -void SDNode::dump() const { dump(0); } -void SDNode::dump(const SelectionDAG *G) const { - print(dbgs(), G); - dbgs() << '\n'; -} - -void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (void*)this << ": "; - - for (unsigned i = 0, e = getNumValues(); i != e; ++i) { - if (i) OS << ","; - if (getValueType(i) == MVT::Other) - OS << "ch"; - else - OS << getValueType(i).getEVTString(); - } - OS << " = " << getOperationName(G); -} - -void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { - if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { - if (!MN->memoperands_empty()) { - OS << "<"; - OS << "Mem:"; - for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), - e = MN->memoperands_end(); i != e; ++i) { - OS << **i; - if (llvm::next(i) != e) - OS << " "; - } - OS << ">"; - } - } else if (const ShuffleVectorSDNode *SVN = - dyn_cast<ShuffleVectorSDNode>(this)) { - OS << "<"; - for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { - int Idx = SVN->getMaskElt(i); - if (i) OS << ","; - if (Idx < 0) - OS << "u"; - else - OS << Idx; - } - OS << ">"; - } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { - OS << '<' << CSDN->getAPIntValue() << '>'; - } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { - if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) - OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; - else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) - OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; - else { - OS << "<APFloat("; - CSDN->getValueAPF().bitcastToAPInt().dump(); - OS << ")>"; - } - } else if (const GlobalAddressSDNode *GADN = - dyn_cast<GlobalAddressSDNode>(this)) { - int64_t offset = GADN->getOffset(); - OS << '<'; - WriteAsOperand(OS, GADN->getGlobal()); - OS << '>'; - if (offset > 0) - OS << " + " << offset; - else - OS << " " << offset; - if (unsigned int TF = GADN->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { - OS << "<" << FIDN->getIndex() << ">"; - } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { - OS << "<" << JTDN->getIndex() << ">"; - if (unsigned int TF = JTDN->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ - int offset = CP->getOffset(); - if (CP->isMachineConstantPoolEntry()) - OS << "<" << *CP->getMachineCPVal() << ">"; - else - OS << "<" << *CP->getConstVal() << ">"; - if (offset > 0) - OS << " + " << offset; - else - OS << " " << offset; - if (unsigned int TF = CP->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { - OS << "<"; - const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); - if (LBB) - OS << LBB->getName() << " "; - OS << (const void*)BBDN->getBasicBlock() << ">"; - } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { - OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); - } else if (const ExternalSymbolSDNode *ES = - dyn_cast<ExternalSymbolSDNode>(this)) { - OS << "'" << ES->getSymbol() << "'"; - if (unsigned int TF = ES->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { - if (M->getValue()) - OS << "<" << M->getValue() << ">"; - else - OS << "<null>"; - } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { - if (MD->getMD()) - OS << "<" << MD->getMD() << ">"; - else - OS << "<null>"; - } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { - OS << ":" << N->getVT().getEVTString(); - } - else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { - OS << "<" << *LD->getMemOperand(); - - bool doExt = true; - switch (LD->getExtensionType()) { - default: doExt = false; break; - case ISD::EXTLOAD: OS << ", anyext"; break; - case ISD::SEXTLOAD: OS << ", sext"; break; - case ISD::ZEXTLOAD: OS << ", zext"; break; - } - if (doExt) - OS << " from " << LD->getMemoryVT().getEVTString(); - - const char *AM = getIndexedModeName(LD->getAddressingMode()); - if (*AM) - OS << ", " << AM; - - OS << ">"; - } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { - OS << "<" << *ST->getMemOperand(); - - if (ST->isTruncatingStore()) - OS << ", trunc to " << ST->getMemoryVT().getEVTString(); - - const char *AM = getIndexedModeName(ST->getAddressingMode()); - if (*AM) - OS << ", " << AM; - - OS << ">"; - } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { - OS << "<" << *M->getMemOperand() << ">"; - } else if (const BlockAddressSDNode *BA = - dyn_cast<BlockAddressSDNode>(this)) { - OS << "<"; - WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); - OS << ", "; - WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); - OS << ">"; - if (unsigned int TF = BA->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } - - if (G) - if (unsigned Order = G->GetOrdering(this)) - OS << " [ORD=" << Order << ']'; - - if (getNodeId() != -1) - OS << " [ID=" << getNodeId() << ']'; - - DebugLoc dl = getDebugLoc(); - if (G && !dl.isUnknown()) { - DIScope - Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); - OS << " dbg:"; - // Omit the directory, since it's usually long and uninteresting. - if (Scope.Verify()) - OS << Scope.getFilename(); - else - OS << "<unknown>"; - OS << ':' << dl.getLine(); - if (dl.getCol() != 0) - OS << ':' << dl.getCol(); - } -} - -void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - if (i) OS << ", "; else OS << " "; - OS << (void*)getOperand(i).getNode(); - if (unsigned RN = getOperand(i).getResNo()) - OS << ":" << RN; - } - print_details(OS, G); -} - -static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, - const SelectionDAG *G, unsigned depth, - unsigned indent) { - if (depth == 0) - return; - - OS.indent(indent); - - N->print(OS, G); - - if (depth < 1) - return; - - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - // Don't follow chain operands. - if (N->getOperand(i).getValueType() == MVT::Other) - continue; - OS << '\n'; - printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); - } -} - -void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, - unsigned depth) const { - printrWithDepthHelper(OS, this, G, depth, 0); -} - -void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { - // Don't print impossibly deep things. - printrWithDepth(OS, G, 10); -} - -void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { - printrWithDepth(dbgs(), G, depth); -} - -void SDNode::dumprFull(const SelectionDAG *G) const { - // Don't print impossibly deep things. - dumprWithDepth(G, 10); -} - -static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (N->getOperand(i).getNode()->hasOneUse()) - DumpNodes(N->getOperand(i).getNode(), indent+2, G); - else - dbgs() << "\n" << std::string(indent+2, ' ') - << (void*)N->getOperand(i).getNode() << ": <multiple use>"; - - - dbgs() << "\n"; - dbgs().indent(indent); - N->dump(G); -} - SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { assert(N->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); @@ -6527,20 +6003,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { const GlobalValue *GV; int64_t GVOffset = 0; if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - // If GV has specified alignment, then use it. Otherwise, use the preferred - // alignment. - unsigned Align = GV->getAlignment(); - if (!Align) { - if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { - if (GVar->hasInitializer()) { - const TargetData *TD = TLI.getTargetData(); - Align = TD->getPreferredAlignment(GVar); - } - } - if (!Align) - Align = TLI.getTargetData()->getABITypeAlignment(GV->getType()); - } - return MinAlign(Align, GVOffset); + unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); + APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); + llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, + TLI.getTargetData()); + unsigned AlignBits = KnownZero.countTrailingOnes(); + unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; + if (Align) + return MinAlign(Align, GVOffset); } // If this is a direct reference to a stack slot, use information about the @@ -6566,74 +6036,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { return 0; } -void SelectionDAG::dump() const { - dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; - - for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); - I != E; ++I) { - const SDNode *N = I; - if (!N->hasOneUse() && N != getRoot().getNode()) - DumpNodes(N, 2, this); - } - - if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); - - dbgs() << "\n\n"; -} - -void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); - print_details(OS, G); -} - -typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; -static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, - const SelectionDAG *G, VisitedSDNodeSet &once) { - if (!once.insert(N)) // If we've been here before, return now. - return; - - // Dump the current SDNode, but don't end the line yet. - OS << std::string(indent, ' '); - N->printr(OS, G); - - // Having printed this SDNode, walk the children: - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - - if (i) OS << ","; - OS << " "; - - if (child->getNumOperands() == 0) { - // This child has no grandchildren; print it inline right here. - child->printr(OS, G); - once.insert(child); - } else { // Just the address. FIXME: also print the child's opcode. - OS << (void*)child; - if (unsigned RN = N->getOperand(i).getResNo()) - OS << ":" << RN; - } - } - - OS << "\n"; - - // Dump children that have grandchildren on their own line(s). - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - DumpNodesr(OS, child, indent+2, G, once); - } -} - -void SDNode::dumpr() const { - VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, 0, once); -} - -void SDNode::dumpr(const SelectionDAG *G) const { - VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, G, once); -} - - // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 095b400..94cb958 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -41,13 +41,13 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -197,7 +197,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, - DAG.getIntPtrConstant(1)); + DAG.getTargetConstant(1, TLI.getPointerTy())); return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } @@ -206,7 +206,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); - return SDValue(); } /// getCopyFromParts - Create a value that contains the specified legal parts @@ -353,10 +352,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { - assert(PartVT.isInteger() && ValueVT.isInteger() && + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. @@ -364,10 +366,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. - assert(PartVT.isInteger() && ValueVT.isInteger() && + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } // The value may have changed - recompute ValueVT. @@ -813,9 +818,11 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } } -void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, + const TargetLibraryInfo *li) { AA = &aa; GFI = gfi; + LibInfo = li; TD = DAG.getTarget().getTargetData(); LPadToCallSiteMap.clear(); } @@ -964,7 +971,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DAG.AddDbgValue(SDV, Val.getNode(), false); } } else - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } } @@ -1054,6 +1061,23 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return DAG.getMergeValues(&Constants[0], Constants.size(), getCurDebugLoc()); } + + if (const ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(C)) { + SmallVector<SDValue, 4> Ops; + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Ops.push_back(SDValue(Val, i)); + } + + if (isa<ArrayType>(CDS->getType())) + return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); + } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && @@ -1088,9 +1112,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. SmallVector<SDValue, 16> Ops; - if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) { + if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { for (unsigned i = 0; i != NumElements; ++i) - Ops.push_back(getValue(CP->getOperand(i))); + Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); EVT EltVT = TLI.getValueType(VecTy->getElementType()); @@ -1126,7 +1150,6 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } llvm_unreachable("Can't get register for value!"); - return SDValue(); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { @@ -1285,8 +1308,8 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, } /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. -uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src, - MachineBasicBlock *Dst) { +uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { BranchProbabilityInfo *BPI = FuncInfo.BPI; if (!BPI) return 0; @@ -1336,6 +1359,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, Condition = getICmpCondCode(IC->getPredicate()); } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { Condition = getFCmpCondCode(FC->getPredicate()); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); } else { Condition = ISD::SETEQ; // silence warning. llvm_unreachable("Unknown compare instruction"); @@ -1811,8 +1836,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { CopyToExportRegsIfNeeded(&I); // Update successor info - InvokeMBB->addSuccessor(Return); - InvokeMBB->addSuccessor(LandingPad); + addSuccessorWithWeight(InvokeMBB, Return); + addSuccessorWithWeight(InvokeMBB, LandingPad); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), @@ -1820,9 +1845,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { DAG.getBasicBlock(Return))); } -void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) { -} - void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); } @@ -1835,6 +1857,12 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); AddLandingPadInfo(LP, MMI, MBB); + // If there aren't registers to copy the values into (e.g., during SjLj + // exceptions), then don't bother to create these DAG nodes. + if (TLI.getExceptionPointerRegister() == 0 && + TLI.getExceptionSelectorRegister() == 0) + return; + SmallVector<EVT, 2> ValueVTs; ComputeValueVTs(TLI, LP.getType(), ValueVTs); @@ -2003,7 +2031,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } static inline bool areJTsAllowed(const TargetLowering &TLI) { - return !DisableJumpTables && + return !TLI.getTargetMachine().Options.DisableJumpTables && (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } @@ -2190,7 +2218,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRange LHSR(CR.Range.first, Pivot); CaseRange RHSR(Pivot, CR.Range.second); - Constant *C = Pivot->Low; + const Constant *C = Pivot->Low; MachineBasicBlock *FalseBB = 0, *TrueBB = 0; // We know that we branch to the LHS if the Value being switched on is @@ -2383,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases - for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { - BasicBlock *SuccBB = SI.getSuccessor(i); + for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); + i != e; ++i) { + const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0; - Cases.push_back(Case(SI.getSuccessorValue(i), - SI.getSuccessorValue(i), + Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), SMBB, ExtraWeight)); } std::sort(Cases.begin(), Cases.end(), CaseCmp()); @@ -2457,7 +2485,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // If there is only the default destination, branch to it if it is not the // next basic block. Otherwise, just fall through. - if (SI.getNumCases() == 1) { + if (!SI.getNumCases()) { // Update machine-CFG edges. // If this is not a fall-through branch, emit the branch. @@ -2626,6 +2654,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); } @@ -2685,11 +2715,12 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) { SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), - DestVT, N, DAG.getIntPtrConstant(0))); + DestVT, N, + DAG.getTargetConstant(0, TLI.getPointerTy()))); } void SelectionDAGBuilder::visitFPExt(const User &I){ - // FPTrunc is never a no-op cast, no need to check + // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); @@ -2772,33 +2803,25 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { TLI.getValueType(I.getType()), InVec, InIdx)); } -// Utility for visitShuffleVector - Returns true if the mask is mask starting -// from SIndx and increasing to the element length (undefs are allowed). -static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) { - unsigned MaskNumElts = Mask.size(); - for (unsigned i = 0; i != MaskNumElts; ++i) - if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx))) +// Utility for visitShuffleVector - Return true if every element in Mask, +// begining from position Pos and ending in Pos+Size, falls within the +// specified sequential range [L, L+Pos). or is undef. +static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, + unsigned Pos, unsigned Size, int Low) { + for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) + if (Mask[i] >= 0 && Mask[i] != Low) return false; return true; } void SelectionDAGBuilder::visitShuffleVector(const User &I) { - SmallVector<int, 8> Mask; SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); - // Convert the ConstantVector mask operand into an array of ints, with -1 - // representing undef values. - SmallVector<Constant*, 8> MaskElts; - cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts); - unsigned MaskNumElts = MaskElts.size(); - for (unsigned i = 0; i != MaskNumElts; ++i) { - if (isa<UndefValue>(MaskElts[i])) - Mask.push_back(-1); - else - Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue()); - } - + SmallVector<int, 8> Mask; + ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); + unsigned MaskNumElts = Mask.size(); + EVT VT = TLI.getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); @@ -2814,11 +2837,23 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Mask is longer than the source vectors and is a multiple of the source // vectors. We can use concatenate vector to make the mask and vectors // lengths match. - if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), - VT, Src1, Src2)); - return; + if (SrcNumElts*2 == MaskNumElts) { + // First check for Src1 in low and Src2 in high + if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && + isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src1, Src2)); + return; + } + // Then check for Src2 in low and Src1 in high + if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && + isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src2, Src1)); + return; + } } // Pad both vectors with undefs to make them the same length as the mask. @@ -2843,10 +2878,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx); - else - MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts - MaskNumElts; + MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, @@ -2858,13 +2892,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Analyze the access pattern of the vector to see if we can extract // two subvectors and do the shuffle. The analysis is done by calculating // the range of elements the mask access on both vectors. - int MinRange[2] = { static_cast<int>(SrcNumElts+1), - static_cast<int>(SrcNumElts+1)}; + int MinRange[2] = { static_cast<int>(SrcNumElts), + static_cast<int>(SrcNumElts)}; int MaxRange[2] = {-1, -1}; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - int Input = 0; + unsigned Input = 0; if (Idx < 0) continue; @@ -2880,35 +2914,31 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Check if the access is smaller than the vector size and can we find // a reasonable extract index. - int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not - // Extract. + int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not + // Extract. int StartIdx[2]; // StartIdx to extract from - for (int Input=0; Input < 2; ++Input) { - if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) { + for (unsigned Input = 0; Input < 2; ++Input) { + if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { RangeUse[Input] = 0; // Unused StartIdx[Input] = 0; - } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) { - // Fits within range but we should see if we can find a good - // start index that is a multiple of the mask length. - if (MaxRange[Input] < (int)MaskNumElts) { - RangeUse[Input] = 1; // Extract from beginning of the vector - StartIdx[Input] = 0; - } else { - StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; - if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts <= SrcNumElts) - RangeUse[Input] = 1; // Extract from a multiple of the mask length. - } + continue; } + + // Find a good start index that is a multiple of the mask length. Then + // see if the rest of the elements are in range. + StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; + if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && + StartIdx[Input] + MaskNumElts <= SrcNumElts) + RangeUse[Input] = 1; // Extract from a multiple of the mask length. } if (RangeUse[0] == 0 && RangeUse[1] == 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } - else if (RangeUse[0] < 2 && RangeUse[1] < 2) { + if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { // Extract appropriate subvector and generate a vector shuffle - for (int Input=0; Input < 2; ++Input) { + for (unsigned Input = 0; Input < 2; ++Input) { SDValue &Src = Input == 0 ? Src1 : Src2; if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); @@ -2921,12 +2951,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - if (Idx < 0) - MappedOps.push_back(Idx); - else if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx - StartIdx[0]); - else - MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); + if (Idx >= 0) { + if (Idx < (int)SrcNumElts) + Idx -= StartIdx[0]; + else + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + } + MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, @@ -2942,22 +2973,20 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { EVT PtrVT = TLI.getPointerTy(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { - if (Mask[i] < 0) { - Ops.push_back(DAG.getUNDEF(EltVT)); - } else { - int Idx = Mask[i]; - SDValue Res; + int Idx = Mask[i]; + SDValue Res; - if (Idx < (int)SrcNumElts) - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src1, DAG.getConstant(Idx, PtrVT)); - else - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src2, - DAG.getConstant(Idx - SrcNumElts, PtrVT)); + if (Idx < 0) { + Res = DAG.getUNDEF(EltVT); + } else { + SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; + if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Ops.push_back(Res); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src, DAG.getConstant(Idx, PtrVT)); } + + Ops.push_back(Res); } setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), @@ -3042,7 +3071,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(I.getOperand(0)); - Type *Ty = I.getOperand(0)->getType(); + // Note that the pointer operand may be a vector of pointers. Take the scalar + // element which holds a pointer. + Type *Ty = I.getOperand(0)->getType()->getScalarType(); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -3096,7 +3127,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned Amt = ElementSize.logBase2(); IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), N.getValueType(), IdxN, - DAG.getConstant(Amt, TLI.getPointerTy())); + DAG.getConstant(Amt, IdxN.getValueType())); } else { SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy()); IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), @@ -3175,8 +3206,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata("nontemporal") != 0; + bool isInvariant = I.getMetadata("invariant.load") != 0; unsigned Alignment = I.getAlignment(); const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; @@ -3224,7 +3257,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, isInvariant, Alignment, TBAAInfo, + Ranges); Values[i] = L; Chains[ChainI] = L.getValue(1); @@ -3358,7 +3392,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { DebugLoc dl = getCurDebugLoc(); ISD::NodeType NT; switch (I.getOperation()) { - default: llvm_unreachable("Unknown atomicrmw operation"); return; + default: llvm_unreachable("Unknown atomicrmw operation"); case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; @@ -3496,24 +3530,16 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { SDValue Op = getValue(I.getArgOperand(i)); - assert(TLI.isTypeLegal(Op.getValueType()) && - "Intrinsic uses a non-legal type?"); Ops.push_back(Op); } SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, I.getType(), ValueVTs); -#ifndef NDEBUG - for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) { - assert(TLI.isTypeLegal(ValueVTs[Val]) && - "Intrinsic uses a non-legal type?"); - } -#endif // NDEBUG if (HasChain) ValueVTs.push_back(MVT::Other); @@ -3556,6 +3582,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } setValue(&I, Result); + } else { + // Assign order to result here. If the intrinsic does not produce a result, + // it won't be mapped to a SDNode and visit() will not assign it an order + // number. + ++SDNodeOrder; + AssignOrderingToNode(Result.getNode()); } } @@ -3597,17 +3629,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); } -// implVisitAluOverflow - Lower arithmetic overflow instrinsics. -const char * -SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { - SDValue Op1 = getValue(I.getArgOperand(0)); - SDValue Op2 = getValue(I.getArgOperand(1)); - - SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); - return 0; -} - /// visitExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. void @@ -4367,9 +4388,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { const SDValue &CFR = Ext.getOperand(0); if (CFR.getOpcode() == ISD::CopyFromReg) return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); - else - if (CFR.getOpcode() == ISD::TRUNCATE) - return getTruncatedArgReg(CFR); + if (CFR.getOpcode() == ISD::TRUNCATE) + return getTruncatedArgReg(CFR); } return 0; } @@ -4398,7 +4418,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Reg = TRI->getFrameRegister(MF); + Reg = TRI->getFrameRegister(MF); if (!Reg && N.getNode()) { if (N.getOpcode() == ISD::CopyFromReg) @@ -4473,9 +4493,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return 0; case Intrinsic::setjmp: - return "_setjmp"+!TLI.usesUnderscoreSetJmp(); + return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; case Intrinsic::longjmp: - return "_longjmp"+!TLI.usesUnderscoreLongJmp(); + return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. @@ -4531,8 +4551,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); MDNode *Variable = DI.getVariable(); const Value *Address = DI.getAddress(); - if (!Address || !DIVariable(Variable).Verify()) + if (!Address || !DIVariable(Variable).Verify()) { + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; + } // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder // but do not always have a corresponding SDNode built. The SDNodeOrder @@ -4543,7 +4565,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Check if address has undef value. if (isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; } @@ -4553,11 +4575,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { N = UnusedArgNodeMap[Address]; SDDbgValue *SDV; if (N.getNode()) { - // Parameters are handled specially. - bool isParameter = - DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); + // Parameters are handled specially. + bool isParameter = + (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || + isa<Argument>(Address)); + const AllocaInst *AI = dyn_cast<AllocaInst>(Address); if (isParameter && !AI) { @@ -4577,7 +4601,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { 0, dl, SDNodeOrder); else { // Can't do anything with other non-AI cases yet. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); + DEBUG(Address->dump()); return 0; } DAG.AddDbgValue(SDV, N.getNode(), isParameter); @@ -4599,7 +4625,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } } } - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } return 0; @@ -4645,7 +4671,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } @@ -4654,8 +4680,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { V = BCI->getOperand(0); const AllocaInst *AI = dyn_cast<AllocaInst>(V); // Don't handle byval struct arguments or VLAs, for example. - if (!AI) + if (!AI) { + DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return 0; + } DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI == FuncInfo.StaticAllocaMap.end()) @@ -4667,43 +4696,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); return 0; } - case Intrinsic::eh_exception: { - // Insert the EXCEPTIONADDR instruction. - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); - SDValue Ops[1]; - Ops[0] = DAG.getRoot(); - SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1); - setValue(&I, Op); - DAG.setRoot(Op.getValue(1)); - return 0; - } - - case Intrinsic::eh_selector: { - MachineBasicBlock *CallMBB = FuncInfo.MBB; - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (CallMBB->isLandingPad()) - AddCatchInfo(I, &MMI, CallMBB); - else { -#ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(&I); -#endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - - // Insert the EHSELECTION instruction. - SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); - SDValue Ops[2]; - Ops[0] = getValue(I.getArgOperand(0)); - Ops[1] = getRoot(); - SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); - DAG.setRoot(Op.getValue(1)); - setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32)); - return 0; - } case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. @@ -4775,11 +4767,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getRoot(), getValue(I.getArgOperand(0)))); return 0; } - case Intrinsic::eh_sjlj_dispatch_setup: { - DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - getRoot(), getValue(I.getArgOperand(0)))); - return 0; - } case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: @@ -4841,6 +4828,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return 0; } + case Intrinsic::x86_avx_vinsertf128_pd_256: + case Intrinsic::x86_avx_vinsertf128_ps_256: + case Intrinsic::x86_avx_vinsertf128_si_256: + case Intrinsic::x86_avx2_vinserti128: { + DebugLoc dl = getCurDebugLoc(); + EVT DestVT = TLI.getValueType(I.getType()); + EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); + uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * + ElVT.getVectorNumElements(); + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getConstant(Idx, MVT::i32)); + setValue(&I, Res); + return 0; + } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -4852,6 +4855,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertuu: { ISD::CvtCode Code = ISD::CVT_INVALID; switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::convertff: Code = ISD::CVT_FF; break; case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; case Intrinsic::convertfui: Code = ISD::CVT_FU; break; @@ -4946,14 +4950,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); - setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg)); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, + dl, Ty, Arg)); return 0; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); - setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg)); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, + dl, Ty, Arg)); return 0; } case Intrinsic::ctpop: { @@ -5052,7 +5060,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); - return 0; case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; @@ -5064,7 +5071,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::trap: { - StringRef TrapFuncName = getTrapFunctionName(); + StringRef TrapFuncName = TM.Options.getTrapFunctionName(); if (TrapFuncName.empty()) { DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); return 0; @@ -5073,25 +5080,36 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, /*isReturnValueUsed=*/true, + /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), Args, DAG, getCurDebugLoc()); DAG.setRoot(Result.second); return 0; } case Intrinsic::uadd_with_overflow: - return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: - return implVisitAluOverflow(I, ISD::SADDO); case Intrinsic::usub_with_overflow: - return implVisitAluOverflow(I, ISD::USUBO); case Intrinsic::ssub_with_overflow: - return implVisitAluOverflow(I, ISD::SSUBO); case Intrinsic::umul_with_overflow: - return implVisitAluOverflow(I, ISD::UMULO); - case Intrinsic::smul_with_overflow: - return implVisitAluOverflow(I, ISD::SMULO); + case Intrinsic::smul_with_overflow: { + ISD::NodeType Op; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; + case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; + case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; + case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; + case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; + case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; + } + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); + return 0; + } case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); @@ -5226,7 +5244,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // If there's a possibility that fast-isel has already selected some amount // of the current basic block, don't emit a tail call. - if (isTailCall && EnableFastISel) + if (isTailCall && TM.Options.EnableFastISel) isTailCall = false; std::pair<SDValue,SDValue> Result = @@ -5236,6 +5254,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), CS.getCallingConv(), isTailCall, + CS.doesNotReturn(), !CS.getInstruction()->use_empty(), Callee, Args, DAG, getCurDebugLoc()); assert((isTailCall || Result.second.getNode()) && @@ -5264,7 +5283,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), - false, false, 1); + false, false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } @@ -5375,7 +5394,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, Ptr, MachinePointerInfo(PtrVal), false /*volatile*/, - false /*nontemporal*/, 1 /* align=1 */); + false /*nontemporal*/, + false /*isinvariant*/, 1 /* align=1 */); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); @@ -5470,23 +5490,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } - // See if any floating point values are being passed to this function. This is - // used to emit an undefined reference to fltused on Windows. - FunctionType *FT = - cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0)); MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (FT->isVarArg() && - !MMI.callsExternalVAFunctionWithFloatingPointArguments()) { - for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - Type* T = I.getArgOperand(i)->getType(); - for (po_iterator<Type*> i = po_begin(T), e = po_end(T); - i != e; ++i) { - if (!i->isFloatingPointTy()) continue; - MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true); - break; - } - } - } + ComputeUsesVAFloatArgument(I, &MMI); const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { @@ -5509,7 +5514,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // can't be a library call. if (!F->hasLocalLinkage() && F->hasName()) { StringRef Name = F->getName(); - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") { + if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") || + (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") || + (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) { if (I.getNumArgOperands() == 2 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5520,7 +5527,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LHS.getValueType(), LHS, RHS)); return; } - } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { + } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") || + (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") || + (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType()) { @@ -5529,7 +5538,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { + } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") || + (LibInfo->has(LibFunc::sinf) && Name == "sinf") || + (LibInfo->has(LibFunc::sinl) && Name == "sinl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5539,7 +5550,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { + } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") || + (LibInfo->has(LibFunc::cosf) && Name == "cosf") || + (LibInfo->has(LibFunc::cosl) && Name == "cosl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5549,7 +5562,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { + } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") || + (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") || + (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5559,6 +5574,85 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } + } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") || + (LibInfo->has(LibFunc::floorf) && Name == "floorf") || + (LibInfo->has(LibFunc::floorl) && Name == "floorl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") || + (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") || + (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") || + (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") || + (LibInfo->has(LibFunc::ceill) && Name == "ceill")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") || + (LibInfo->has(LibFunc::rintf) && Name == "rintf") || + (LibInfo->has(LibFunc::rintl) && Name == "rintl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") || + (LibInfo->has(LibFunc::truncf) && Name == "truncf") || + (LibInfo->has(LibFunc::truncl) && Name == "truncl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") || + (LibInfo->has(LibFunc::log2f) && Name == "log2f") || + (LibInfo->has(LibFunc::log2l) && Name == "log2l")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") || + (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") || + (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } } else if (Name == "memcmp") { if (visitMemCmpCall(I)) return; @@ -5596,22 +5690,6 @@ public: : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { } - /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers - /// busy in OutputRegs/InputRegs. - void MarkAllocatedRegs(bool isOutReg, bool isInReg, - std::set<unsigned> &OutputRegs, - std::set<unsigned> &InputRegs, - const TargetRegisterInfo &TRI) const { - if (isOutReg) { - for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) - MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI); - } - if (isInReg) { - for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) - MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI); - } - } - /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. @@ -5659,18 +5737,6 @@ public: return TLI.getValueType(OpTy, true); } - -private: - /// MarkRegAndAliases - Mark the specified register and all aliases in the - /// specified set. - static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs, - const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg"); - Regs.insert(Reg); - if (const unsigned *Aliases = TRI.getAliasSet(Reg)) - for (; *Aliases; ++Aliases) - Regs.insert(*Aliases); - } }; typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; @@ -5684,39 +5750,13 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. -/// Input and OutputRegs are the set of already allocated physical registers. /// static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc DL, - SDISelAsmOperandInfo &OpInfo, - std::set<unsigned> &OutputRegs, - std::set<unsigned> &InputRegs) { + SDISelAsmOperandInfo &OpInfo) { LLVMContext &Context = *DAG.getContext(); - // Compute whether this value requires an input register, an output register, - // or both. - bool isOutReg = false; - bool isInReg = false; - switch (OpInfo.Type) { - case InlineAsm::isOutput: - isOutReg = true; - - // If there is an input constraint that matches this, we need to reserve - // the input register so no other inputs allocate to it. - isInReg = OpInfo.hasMatchingInput(); - break; - case InlineAsm::isInput: - isInReg = true; - isOutReg = false; - break; - case InlineAsm::isClobber: - isOutReg = true; - isInReg = true; - break; - } - - MachineFunction &MF = DAG.getMachineFunction(); SmallVector<unsigned, 4> Regs; @@ -5790,8 +5830,6 @@ static void GetRegistersForValue(SelectionDAG &DAG, } OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); - OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; } @@ -5822,8 +5860,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; - std::set<unsigned> OutputRegs, InputRegs; - TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS); @@ -5956,7 +5992,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // constant pool entry to get its address. const Value *OpVal = OpInfo.CallOperandVal; if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || - isa<ConstantVector>(OpVal)) { + isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), TLI.getPointerTy()); } else { @@ -5985,8 +6021,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, - InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -5997,8 +6032,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, - InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6052,9 +6086,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Copy the output from the appropriate register. Find a register that // we can use. - if (OpInfo.AssignedRegs.Regs.empty()) - report_fatal_error("Couldn't allocate output reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'!"); + if (OpInfo.AssignedRegs.Regs.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } // If this is an indirect operand, store through the pointer after the // asm. @@ -6154,9 +6192,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::vector<SDValue> Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); - if (Ops.empty()) - report_fatal_error("Invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'!"); + if (Ops.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } // Add information to the INLINEASM node to know about this input. unsigned ResOpType = @@ -6187,9 +6229,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Don't know how to handle indirect register inputs yet!"); // Copy the input into the appropriate registers. - if (OpInfo.AssignedRegs.Regs.empty()) - report_fatal_error("Couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'!"); + if (OpInfo.AssignedRegs.Regs.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); @@ -6327,7 +6373,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, unsigned NumFixedArgs, CallingConv::ID CallConv, bool isTailCall, - bool isReturnValueUsed, + bool doesNotRet, bool isReturnValueUsed, SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) const { @@ -6424,7 +6470,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, } SmallVector<SDValue, 4> InVals; - Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, + Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); // Verify that the target's LowerCall behaved as expected. @@ -6493,7 +6539,6 @@ void TargetLowering::LowerOperationWrapper(SDNode *N, SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("LowerOperation not implemented for this target!"); - return SDValue(); } void @@ -6515,10 +6560,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { /// isOnlyUsedInEntryBlock - If the specified argument is only used in the /// entry block, return true. This includes arguments used by switches, since /// the switch may expand into multiple basic blocks. -static bool isOnlyUsedInEntryBlock(const Argument *A) { +static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { // With FastISel active, we may be splitting blocks, so force creation // of virtual registers for all non-dead arguments. - if (EnableFastISel) + if (FastISel) return A->use_empty(); const BasicBlock *Entry = A->getParent()->begin(); @@ -6708,7 +6753,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { SDB->getCurDebugLoc()); SDB->setValue(I, Res); - if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = @@ -6718,7 +6763,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this argument is live outside of the entry block, insert a copy from // wherever we got it to the vreg that other BB's will reference it as. - if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { // If we can, though, try to skip creating an unnecessary vreg. // FIXME: This isn't very clean... it would be nice to make this more // general. It's also subtly incompatible with the hacks FastISel @@ -6729,7 +6774,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { continue; } } - if (!isOnlyUsedInEntryBlock(I)) { + if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) { FuncInfo->InitializeRegForValue(I); SDB->CopyToExportRegsIfNeeded(I); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 0a21ca3..8393b41 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -67,11 +67,11 @@ class SIToFPInst; class StoreInst; class SwitchInst; class TargetData; +class TargetLibraryInfo; class TargetLowering; class TruncInst; class UIToFPInst; class UnreachableInst; -class UnwindInst; class VAArgInst; class ZExtInst; @@ -129,13 +129,13 @@ private: /// Case - A struct to record the Value for a switch case, and the /// case's target basic block. struct Case { - Constant* Low; - Constant* High; + const Constant *Low; + const Constant *High; MachineBasicBlock* BB; uint32_t ExtraWeight; Case() : Low(0), High(0), BB(0), ExtraWeight(0) { } - Case(Constant* low, Constant* high, MachineBasicBlock* bb, + Case(const Constant *low, const Constant *high, MachineBasicBlock *bb, uint32_t extraweight) : Low(low), High(high), BB(bb), ExtraWeight(extraweight) { } @@ -294,6 +294,7 @@ public: SelectionDAG &DAG; const TargetData *TD; AliasAnalysis *AA; + const TargetLibraryInfo *LibInfo; /// SwitchCases - Vector of CaseBlock structures used to communicate /// SwitchInst code generation information. @@ -338,7 +339,8 @@ public: HasTailCall(false), Context(dag.getContext()) { } - void init(GCFunctionInfo *gfi, AliasAnalysis &aa); + void init(GCFunctionInfo *gfi, AliasAnalysis &aa, + const TargetLibraryInfo *li); /// clear - Clear out the current SelectionDAG and the associated /// state and prepare this SelectionDAGBuilder object to be used @@ -451,7 +453,8 @@ private: MachineBasicBlock* Default, MachineBasicBlock *SwitchBB); - uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); + uint32_t getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const; void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, uint32_t Weight = 0); public: @@ -471,7 +474,6 @@ private: // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); void visitResume(const ResumeInst &I); - void visitUnwind(const UnwindInst &I); void visitBinary(const User &I, unsigned OpCode); void visitShift(const User &I, unsigned Opcode); @@ -554,8 +556,6 @@ private: void visitUserOp2(const Instruction &I) { llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } - - const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op); void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp new file mode 100644 index 0000000..f981afb --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -0,0 +1,631 @@ +//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::dump method and friends. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<<Unknown DAG Node>>"; + if (isMachineOpcode()) { + if (G) + if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (getMachineOpcode() < TII->getNumOpcodes()) + return TII->getName(getMachineOpcode()); + return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; + } + if (G) { + const TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; + } + return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; + +#ifndef NDEBUG + case ISD::DELETED_NODE: return "<<Deleted Node!>>"; +#endif + case ISD::PREFETCH: return "Prefetch"; + case ISD::MEMBARRIER: return "MemBarrier"; + case ISD::ATOMIC_FENCE: return "AtomicFence"; + case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_SWAP: return "AtomicSwap"; + case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; + case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::ATOMIC_LOAD: return "AtomicLoad"; + case ISD::ATOMIC_STORE: return "AtomicStore"; + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::MDNODE_SDNODE: return "MDNode"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::BasicBlock: return "BasicBlock"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + case ISD::RegisterMask: return "RegisterMask"; + case ISD::Constant: return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::LSDAADDR: return "LSDAADDR"; + case ISD::EHSELECTION: return "EHSELECTION"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; + case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::BlockAddress: return "BlockAddress"; + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; + unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return Intrinsic::getName((Intrinsic::ID)IID); + else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) + return TII->getName(IID); + llvm_unreachable("Invalid intrinsic ID"); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstantFP: return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + case ISD::TargetBlockAddress: return "TargetBlockAddress"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::EH_LABEL: return "eh_label"; + case ISD::HANDLENODE: return "handlenode"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::FSIN: return "fsin"; + case ISD::FCOS: return "fcos"; + case ISD::FTRUNC: return "ftrunc"; + case ISD::FFLOOR: return "ffloor"; + case ISD::FCEIL: return "fceil"; + case ISD::FRINT: return "frint"; + case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FEXP: return "fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::FLOG: return "flog"; + case ISD::FLOG2: return "flog2"; + case ISD::FLOG10: return "flog10"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::SMUL_LOHI: return "smul_lohi"; + case ISD::UMUL_LOHI: return "umul_lohi"; + case ISD::SDIVREM: return "sdivrem"; + case ISD::UDIVREM: return "udivrem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FADD: return "fadd"; + case ISD::FSUB: return "fsub"; + case ISD::FMUL: return "fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::FMA: return "fma"; + case ISD::FREM: return "frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + case ISD::FGETSIGN: return "fgetsign"; + case ISD::FPOW: return "fpow"; + + case ISD::FPOWI: return "fpowi"; + case ISD::SETCC: return "setcc"; + case ISD::SELECT: return "select"; + case ISD::VSELECT: return "vselect"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::INSERT_SUBVECTOR: return "insert_subvector"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::SADDO: return "saddo"; + case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::FP_ROUND_INREG: return "fp_round_inreg"; + case ISD::FP_EXTEND: return "fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::BITCAST: return "bitcast"; + case ISD::FP16_TO_FP32: return "fp16_to_fp32"; + case ISD::FP32_TO_FP16: return "fp32_to_fp16"; + + case ISD::CONVERT_RNDSAT: { + switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { + default: llvm_unreachable("Unknown cvt code!"); + case ISD::CVT_FF: return "cvt_ff"; + case ISD::CVT_FS: return "cvt_fs"; + case ISD::CVT_FU: return "cvt_fu"; + case ISD::CVT_SF: return "cvt_sf"; + case ISD::CVT_UF: return "cvt_uf"; + case ISD::CVT_SS: return "cvt_ss"; + case ISD::CVT_SU: return "cvt_su"; + case ISD::CVT_US: return "cvt_us"; + case ISD::CVT_UU: return "cvt_uu"; + } + } + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + case ISD::TRAP: return "trap"; + + // Bit manipulation + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; + case ISD::CTLZ: return "ctlz"; + case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; + + // Trampolines + case ISD::INIT_TRAMPOLINE: return "init_trampoline"; + case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; + + case ISD::CONDCODE: + switch (cast<CondCodeSDNode>(this)->get()) { + default: llvm_unreachable("Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setue"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + + case ISD::SETTRUE: return "settrue"; + case ISD::SETTRUE2: return "settrue2"; + case ISD::SETFALSE: return "setfalse"; + case ISD::SETFALSE2: return "setfalse2"; + } + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: return ""; + case ISD::PRE_INC: return "<pre-inc>"; + case ISD::PRE_DEC: return "<pre-dec>"; + case ISD::POST_INC: return "<post-inc>"; + case ISD::POST_DEC: return "<post-dec>"; + } +} + +void SDNode::dump() const { dump(0); } +void SDNode::dump(const SelectionDAG *G) const { + print(dbgs(), G); + dbgs() << '\n'; +} + +void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { + OS << (void*)this << ": "; + + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) OS << ","; + if (getValueType(i) == MVT::Other) + OS << "ch"; + else + OS << getValueType(i).getEVTString(); + } + OS << " = " << getOperationName(G); +} + +void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { + if (!MN->memoperands_empty()) { + OS << "<"; + OS << "Mem:"; + for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), + e = MN->memoperands_end(); i != e; ++i) { + OS << **i; + if (llvm::next(i) != e) + OS << " "; + } + OS << ">"; + } + } else if (const ShuffleVectorSDNode *SVN = + dyn_cast<ShuffleVectorSDNode>(this)) { + OS << "<"; + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); + if (i) OS << ","; + if (Idx < 0) + OS << "u"; + else + OS << Idx; + } + OS << ">"; + } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { + OS << '<' << CSDN->getAPIntValue() << '>'; + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { + if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) + OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; + else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) + OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; + else { + OS << "<APFloat("; + CSDN->getValueAPF().bitcastToAPInt().dump(); + OS << ")>"; + } + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(this)) { + int64_t offset = GADN->getOffset(); + OS << '<'; + WriteAsOperand(OS, GADN->getGlobal()); + OS << '>'; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = GADN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { + OS << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { + OS << "<" << JTDN->getIndex() << ">"; + if (unsigned int TF = JTDN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + OS << "<" << *CP->getMachineCPVal() << ">"; + else + OS << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = CP->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { + OS << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + OS << LBB->getName() << " "; + OS << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { + OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(this)) { + OS << "'" << ES->getSymbol() << "'"; + if (unsigned int TF = ES->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { + if (M->getValue()) + OS << "<" << M->getValue() << ">"; + else + OS << "<null>"; + } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { + if (MD->getMD()) + OS << "<" << MD->getMD() << ">"; + else + OS << "<null>"; + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { + OS << ":" << N->getVT().getEVTString(); + } + else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { + OS << "<" << *LD->getMemOperand(); + + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << LD->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { + OS << "<" << *ST->getMemOperand(); + + if (ST->isTruncatingStore()) + OS << ", trunc to " << ST->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { + OS << "<" << *M->getMemOperand() << ">"; + } else if (const BlockAddressSDNode *BA = + dyn_cast<BlockAddressSDNode>(this)) { + OS << "<"; + WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); + OS << ", "; + WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); + OS << ">"; + if (unsigned int TF = BA->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } + + if (G) + if (unsigned Order = G->GetOrdering(this)) + OS << " [ORD=" << Order << ']'; + + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; + + DebugLoc dl = getDebugLoc(); + if (G && !dl.isUnknown()) { + DIScope + Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); + OS << " dbg:"; + // Omit the directory, since it's usually long and uninteresting. + if (Scope.Verify()) + OS << Scope.getFilename(); + else + OS << "<unknown>"; + OS << ':' << dl.getLine(); + if (dl.getCol() != 0) + OS << ':' << dl.getCol(); + } +} + +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getNode()->hasOneUse()) + DumpNodes(N->getOperand(i).getNode(), indent+2, G); + else + dbgs() << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": <multiple use>"; + + dbgs() << '\n'; + dbgs().indent(indent); + N->dump(G); +} + +void SelectionDAG::dump() const { + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; + + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) { + const SDNode *N = I; + if (!N->hasOneUse() && N != getRoot().getNode()) + DumpNodes(N, 2, this); + } + + if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); + dbgs() << "\n\n"; +} + +void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + print_details(OS, G); +} + +typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; +static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, + const SelectionDAG *G, VisitedSDNodeSet &once) { + if (!once.insert(N)) // If we've been here before, return now. + return; + + // Dump the current SDNode, but don't end the line yet. + OS.indent(indent); + N->printr(OS, G); + + // Having printed this SDNode, walk the children: + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + + if (i) OS << ","; + OS << " "; + + if (child->getNumOperands() == 0) { + // This child has no grandchildren; print it inline right here. + child->printr(OS, G); + once.insert(child); + } else { // Just the address. FIXME: also print the child's opcode. + OS << (void*)child; + if (unsigned RN = N->getOperand(i).getResNo()) + OS << ":" << RN; + } + } + + OS << "\n"; + + // Dump children that have grandchildren on their own line(s). + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + DumpNodesr(OS, child, indent+2, G, once); + } +} + +void SDNode::dumpr() const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, 0, once); +} + +void SDNode::dumpr(const SelectionDAG *G) const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, G, once); +} + +static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, + const SelectionDAG *G, unsigned depth, + unsigned indent) { + if (depth == 0) + return; + + OS.indent(indent); + + N->print(OS, G); + + if (depth < 1) + return; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + // Don't follow chain operands. + if (N->getOperand(i).getValueType() == MVT::Other) + continue; + OS << '\n'; + printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); + } +} + +void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, + unsigned depth) const { + printrWithDepthHelper(OS, this, G, depth, 0); +} + +void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { + // Don't print impossibly deep things. + printrWithDepth(OS, G, 10); +} + +void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { + printrWithDepth(dbgs(), G, depth); +} + +void SDNode::dumprFull(const SelectionDAG *G) const { + // Don't print impossibly deep things. + dumprWithDepth(G, 10); +} + +void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) OS << ", "; else OS << " "; + OS << (void*)getOperand(i).getNode(); + if (unsigned RN = getOperand(i).getResNo()) + OS << ":" << RN; + } + print_details(OS, G); +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 68b9146..605509b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -41,6 +41,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -61,6 +62,80 @@ STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); +#ifndef NDEBUG +static cl::opt<bool> +EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden, + cl::desc("Enable extra verbose messages in the \"fast\" " + "instruction selector")); + // Terminators +STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret"); +STATISTIC(NumFastIselFailBr,"Fast isel fails on Br"); +STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch"); +STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr"); +STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke"); +STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume"); +STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable"); + + // Standard binary operators... +STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add"); +STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd"); +STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub"); +STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub"); +STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul"); +STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul"); +STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv"); +STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv"); +STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv"); +STATISTIC(NumFastIselFailURem,"Fast isel fails on URem"); +STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem"); +STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem"); + + // Logical operators... +STATISTIC(NumFastIselFailAnd,"Fast isel fails on And"); +STATISTIC(NumFastIselFailOr,"Fast isel fails on Or"); +STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor"); + + // Memory instructions... +STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca"); +STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load"); +STATISTIC(NumFastIselFailStore,"Fast isel fails on Store"); +STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg"); +STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM"); +STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence"); +STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr"); + + // Convert instructions... +STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc"); +STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt"); +STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt"); +STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc"); +STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt"); +STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI"); +STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI"); +STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP"); +STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP"); +STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr"); +STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt"); +STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast"); + + // Other instructions... +STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp"); +STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp"); +STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI"); +STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select"); +STATISTIC(NumFastIselFailCall,"Fast isel fails on Call"); +STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl"); +STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr"); +STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr"); +STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg"); +STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement"); +STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement"); +STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector"); +STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue"); +STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue"); +STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad"); +#endif + static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " @@ -142,14 +217,15 @@ namespace llvm { CodeGenOpt::Level OptLevel) { const TargetLowering &TLI = IS->getTargetLowering(); - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOpt::None || + TLI.getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::Latency) - return createTDListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::RegPressure) return createBURRListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::Hybrid) return createHybridListDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == Sched::VLIW) + return createVLIWDAGScheduler(IS, OptLevel); assert(TLI.getSchedulingPreference() == Sched::ILP && "Unknown sched type!"); return createILPListDAGScheduler(IS, OptLevel); @@ -174,12 +250,11 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, "TargetLowering::EmitInstrWithCustomInserter!"; #endif llvm_unreachable(0); - return 0; } void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - assert(!MI->getDesc().hasPostISelHook() && + assert(!MI->hasPostISelHook() && "If a target marks an instruction with 'hasPostISelHook', " "it must implement TargetLowering::AdjustInstrPostInstrSelection!"); } @@ -188,11 +263,13 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// +void SelectionDAGISel::ISelUpdater::anchor() { } + SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), - CurDAG(new SelectionDAG(tm)), + CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), @@ -200,6 +277,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { @@ -213,6 +291,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); + AU.addRequired<TargetLibraryInfo>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -258,9 +337,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Do some sanity-checking on the command-line options. - assert((!EnableFastISelVerbose || EnableFastISel) && + assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); - assert((!EnableFastISelAbort || EnableFastISel) && + assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && "-fast-isel-abort requires -fast-isel"); const Function &Fn = *mf.getFunction(); @@ -270,6 +349,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MF = &mf; RegInfo = &MF->getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); + LibInfo = &getAnalysis<TargetLibraryInfo>(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); @@ -284,7 +364,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { else FuncInfo->BPI = 0; - SDB->init(GFI, *AA); + SDB->init(GFI, *AA, LibInfo); SelectAllBasicBlocks(Fn); @@ -348,7 +428,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII.get(TargetOpcode::DBG_VALUE)) .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug) .addImm(Offset).addMetadata(Variable); - EntryMBB->insertAfter(CopyUseMI, NewMI); + MachineBasicBlock::iterator Pos = CopyUseMI; + EntryMBB->insertAfter(Pos, NewMI); } } } @@ -374,7 +455,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } // Determine if there is a call to setjmp in the machine function. - MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice()); + MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); // Replace forward-declared registers with the registers containing // the desired value. @@ -427,7 +508,6 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { Worklist.push_back(CurDAG->getRoot().getNode()); - APInt Mask; APInt KnownZero; APInt KnownOne; @@ -458,8 +538,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); - Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); - CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); + CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne); FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); } while (!Worklist.empty()); } @@ -478,8 +557,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #endif { BlockNumber = FuncInfo->MBB->getNumber(); - BlockName = MF->getFunction()->getNameStr() + ":" + - FuncInfo->MBB->getBasicBlock()->getNameStr(); + BlockName = MF->getFunction()->getName().str() + ":" + + FuncInfo->MBB->getBasicBlock()->getName().str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); @@ -489,7 +568,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in pre-legalize mode. { NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); - CurDAG->Combine(Unrestricted, *AA, OptLevel); + CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel); } DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber @@ -517,7 +596,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("DAG Combining after legalize types", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel); } DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber @@ -542,7 +621,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel); } DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" @@ -564,7 +643,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in post-legalize mode. { NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel); } DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber @@ -592,7 +671,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("Instruction Scheduling", GroupName, TimePassesIsEnabled); - Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt); + Scheduler->Run(CurDAG, FuncInfo->MBB); } if (ViewSUnitDAGs) Scheduler->viewGraph(); @@ -603,8 +682,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); - LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(); - FuncInfo->InsertPt = Scheduler->InsertPos; + // FuncInfo->InsertPt is passed by reference and set to the end of the + // scheduled instructions. + LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt); } // If the block was split, make sure we update any references that are used to @@ -693,43 +773,18 @@ void SelectionDAGISel::PrepareEHLandingPad() { // Assign the call site to the landing pad's begin label. MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); - + const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); // Mark exception register as live in. - unsigned Reg = TLI.getExceptionAddressRegister(); + unsigned Reg = TLI.getExceptionPointerRegister(); if (Reg) MBB->addLiveIn(Reg); // Mark exception selector register as live in. Reg = TLI.getExceptionSelectorRegister(); if (Reg) MBB->addLiveIn(Reg); - - // FIXME: Hack around an exception handling flaw (PR1508): the personality - // function and list of typeids logically belong to the invoke (or, if you - // like, the basic block containing the invoke), and need to be associated - // with it in the dwarf exception handling tables. Currently however the - // information is provided by an intrinsic (eh.selector) that can be moved - // to unexpected places by the optimizers: if the unwind edge is critical, - // then breaking it can result in the intrinsics being in the successor of - // the landing pad, not the landing pad itself. This results - // in exceptions not being caught because no typeids are associated with - // the invoke. This may not be the only way things can go wrong, but it - // is the only way we try to work around for the moment. - const BasicBlock *LLVMBB = MBB->getBasicBlock(); - const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); - - if (Br && Br->isUnconditional()) { // Critical edge? - BasicBlock::const_iterator I, E; - for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) - if (isa<EHSelectorInst>(I)) - break; - - if (I == E) - // No catch info found - try to extract some from the successor. - CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo); - } } /// TryToFoldFastISelLoad - We're checking to see if we can fold the specified @@ -822,10 +877,90 @@ static bool isFoldedOrDeadInstruction(const Instruction *I, !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } +#ifndef NDEBUG +// Collect per Instruction statistics for fast-isel misses. Only those +// instructions that cause the bail are accounted for. It does not account for +// instructions higher in the block. Thus, summing the per instructions stats +// will not add up to what is reported by NumFastIselFailures. +static void collectFailStats(const Instruction *I) { + switch (I->getOpcode()) { + default: assert (0 && "<Invalid operator> "); + + // Terminators + case Instruction::Ret: NumFastIselFailRet++; return; + case Instruction::Br: NumFastIselFailBr++; return; + case Instruction::Switch: NumFastIselFailSwitch++; return; + case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return; + case Instruction::Invoke: NumFastIselFailInvoke++; return; + case Instruction::Resume: NumFastIselFailResume++; return; + case Instruction::Unreachable: NumFastIselFailUnreachable++; return; + + // Standard binary operators... + case Instruction::Add: NumFastIselFailAdd++; return; + case Instruction::FAdd: NumFastIselFailFAdd++; return; + case Instruction::Sub: NumFastIselFailSub++; return; + case Instruction::FSub: NumFastIselFailFSub++; return; + case Instruction::Mul: NumFastIselFailMul++; return; + case Instruction::FMul: NumFastIselFailFMul++; return; + case Instruction::UDiv: NumFastIselFailUDiv++; return; + case Instruction::SDiv: NumFastIselFailSDiv++; return; + case Instruction::FDiv: NumFastIselFailFDiv++; return; + case Instruction::URem: NumFastIselFailURem++; return; + case Instruction::SRem: NumFastIselFailSRem++; return; + case Instruction::FRem: NumFastIselFailFRem++; return; + + // Logical operators... + case Instruction::And: NumFastIselFailAnd++; return; + case Instruction::Or: NumFastIselFailOr++; return; + case Instruction::Xor: NumFastIselFailXor++; return; + + // Memory instructions... + case Instruction::Alloca: NumFastIselFailAlloca++; return; + case Instruction::Load: NumFastIselFailLoad++; return; + case Instruction::Store: NumFastIselFailStore++; return; + case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return; + case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return; + case Instruction::Fence: NumFastIselFailFence++; return; + case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return; + + // Convert instructions... + case Instruction::Trunc: NumFastIselFailTrunc++; return; + case Instruction::ZExt: NumFastIselFailZExt++; return; + case Instruction::SExt: NumFastIselFailSExt++; return; + case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return; + case Instruction::FPExt: NumFastIselFailFPExt++; return; + case Instruction::FPToUI: NumFastIselFailFPToUI++; return; + case Instruction::FPToSI: NumFastIselFailFPToSI++; return; + case Instruction::UIToFP: NumFastIselFailUIToFP++; return; + case Instruction::SIToFP: NumFastIselFailSIToFP++; return; + case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; + case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return; + case Instruction::BitCast: NumFastIselFailBitCast++; return; + + // Other instructions... + case Instruction::ICmp: NumFastIselFailICmp++; return; + case Instruction::FCmp: NumFastIselFailFCmp++; return; + case Instruction::PHI: NumFastIselFailPHI++; return; + case Instruction::Select: NumFastIselFailSelect++; return; + case Instruction::Call: NumFastIselFailCall++; return; + case Instruction::Shl: NumFastIselFailShl++; return; + case Instruction::LShr: NumFastIselFailLShr++; return; + case Instruction::AShr: NumFastIselFailAShr++; return; + case Instruction::VAArg: NumFastIselFailVAArg++; return; + case Instruction::ExtractElement: NumFastIselFailExtractElement++; return; + case Instruction::InsertElement: NumFastIselFailInsertElement++; return; + case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return; + case Instruction::ExtractValue: NumFastIselFailExtractValue++; return; + case Instruction::InsertValue: NumFastIselFailInsertValue++; return; + case Instruction::LandingPad: NumFastIselFailLandingPad++; return; + } +} +#endif + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; - if (EnableFastISel) + if (TM.Options.EnableFastISel) FastIS = TLI.createFastISel(*FuncInfo); // Iterate over all basic blocks in the function. @@ -894,13 +1029,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->setLastLocalValue(0); } + unsigned NumFastIselRemaining = std::distance(Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { const Instruction *Inst = llvm::prior(BI); // If we no longer require this instruction, skip it. - if (isFoldedOrDeadInstruction(Inst, FuncInfo)) + if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { + --NumFastIselRemaining; continue; + } // Bottom-up: reset the insert pos at the top, after any local-value // instructions. @@ -908,6 +1046,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(Inst)) { + --NumFastIselRemaining; ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and // then see if there is a load right before the selected instructions. @@ -920,15 +1059,23 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && BeforeInst->hasOneUse() && - TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) + TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) { // If we succeeded, don't re-select the load. BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); + --NumFastIselRemaining; + ++NumFastIselSuccess; + } continue; } +#ifndef NDEBUG + if (EnableFastISelVerbose2) + collectFailStats(Inst); +#endif + // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(Inst)) { - ++NumFastIselFailures; + if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed call: "; Inst->dump(); @@ -943,24 +1090,30 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { bool HadTailCall = false; SelectBasicBlock(Inst, BI, HadTailCall); + // Recompute NumFastIselRemaining as Selection DAG instruction + // selection may have handled the call, input args, etc. + unsigned RemainingNow = std::distance(Begin, BI); + NumFastIselFailures += NumFastIselRemaining - RemainingNow; + // If the call was emitted as a tail call, we're done with the block. if (HadTailCall) { --BI; break; } + NumFastIselRemaining = RemainingNow; continue; } if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) { // Don't abort, and use a different message for terminator misses. - ++NumFastIselFailures; + NumFastIselFailures += NumFastIselRemaining; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed terminator: "; Inst->dump(); } } else { - ++NumFastIselFailures; + NumFastIselFailures += NumFastIselRemaining; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; Inst->dump(); @@ -1289,7 +1442,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, APInt NeededMask = DesiredMask & ~ActualMask; APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); + CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne); // If all the missing bits in the or are already known to be set, match! if ((NeededMask & KnownOne) == NeededMask) @@ -2025,6 +2178,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::EntryToken: // These nodes remain the same. case ISD::BasicBlock: case ISD::Register: + case ISD::RegisterMask: //case ISD::VALUETYPE: //case ISD::CONDCODE: case ISD::HANDLENODE: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index cd1647b..6cde05a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" @@ -28,7 +27,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Config/config.h" using namespace llvm; namespace llvm { @@ -148,7 +146,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), false, Title); #else errs() << "SelectionDAG::viewGraph is only available in debug builds on " diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 907d8d9..09a2b1f 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -36,31 +36,9 @@ using namespace llvm; /// - the promotion of vector elements. This feature is disabled by default /// and only enabled using this flag. static cl::opt<bool> -AllowPromoteIntElem("promote-elements", cl::Hidden, +AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true), cl::desc("Allow promotion of integer vector element types")); -namespace llvm { -TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) { - bool isLocal = GV->hasLocalLinkage(); - bool isDeclaration = GV->isDeclaration(); - // FIXME: what should we do for protected and internal visibility? - // For variables, is internal different from hidden? - bool isHidden = GV->hasHiddenVisibility(); - - if (reloc == Reloc::PIC_) { - if (isLocal || isHidden) - return TLSModel::LocalDynamic; - else - return TLSModel::GeneralDynamic; - } else { - if (!isDeclaration || isHidden) - return TLSModel::LocalExec; - else - return TLSModel::InitialExec; - } -} -} - /// InitLibcallNames - Set default libcall names. /// static void InitLibcallNames(const char **Names) { @@ -572,21 +550,42 @@ TargetLowering::TargetLowering(const TargetMachine &tm, // ConstantFP nodes default to expand. Targets can either change this to // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); setOperationAction(ISD::ConstantFP, MVT::f32, Expand); setOperationAction(ISD::ConstantFP, MVT::f64, Expand); setOperationAction(ISD::ConstantFP, MVT::f80, Expand); // These library functions default to expand. - setOperationAction(ISD::FLOG , MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10,MVT::f64, Expand); - setOperationAction(ISD::FEXP , MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FLOG , MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG10,MVT::f32, Expand); - setOperationAction(ISD::FEXP , MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FLOG , MVT::f16, Expand); + setOperationAction(ISD::FLOG2, MVT::f16, Expand); + setOperationAction(ISD::FLOG10, MVT::f16, Expand); + setOperationAction(ISD::FEXP , MVT::f16, Expand); + setOperationAction(ISD::FEXP2, MVT::f16, Expand); + setOperationAction(ISD::FFLOOR, MVT::f16, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); + setOperationAction(ISD::FCEIL, MVT::f16, Expand); + setOperationAction(ISD::FRINT, MVT::f16, Expand); + setOperationAction(ISD::FTRUNC, MVT::f16, Expand); + setOperationAction(ISD::FLOG , MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP , MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::f32, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); + setOperationAction(ISD::FCEIL, MVT::f32, Expand); + setOperationAction(ISD::FRINT, MVT::f32, Expand); + setOperationAction(ISD::FTRUNC, MVT::f32, Expand); + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -610,7 +609,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; - SchedPreferenceInfo = Sched::Latency; + SchedPreferenceInfo = Sched::ILP; JumpBufSize = 0; JumpBufAlignment = 0; MinFunctionAlignment = 0; @@ -1080,8 +1079,12 @@ unsigned TargetLowering::getJumpTableEncoding() const { SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { // If our PIC model is GP relative, use the global offset table as the base. - if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress) + unsigned JTEncoding = getJumpTableEncoding(); + + if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || + (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return Table; } @@ -1223,7 +1226,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Depth != 0) { // If not at the root, Just compute the KnownZero/KnownOne bits to // simplify things downstream. - TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -1242,8 +1245,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask; - KnownZero = ~KnownOne & NewMask; + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); + KnownZero = ~KnownOne; return false; // Don't fall through, will infinitely loop. case ISD::AND: // If the RHS is a constant, check to see if the LHS would be zero without @@ -1253,8 +1256,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { APInt LHSZero, LHSOne; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, - LHSZero, LHSOne, Depth); + TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth); // If the LHS already has zeros where RHSC does, this and is dead. if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op.getOperand(0)); @@ -1473,9 +1475,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { SDValue InnerOp = InOp.getNode()->getOperand(0); EVT InnerVT = InnerOp.getValueType(); - if ((APInt::getHighBitsSet(BitWidth, - BitWidth - InnerVT.getSizeInBits()) & - DemandedMask) == 0 && + unsigned InnerBits = InnerVT.getSizeInBits(); + if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 && isTypeDesirableForOp(ISD::SHL, InnerVT)) { EVT ShTy = getShiftAmountTy(InnerVT); if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) @@ -1545,7 +1546,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedMask == 1) + if (NewMask == 1) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); @@ -1588,23 +1589,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } break; case ISD::SIGN_EXTEND_INREG: { - EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + + APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); + // If we only care about the highest bit, don't bother shifting right. + if (MsbMask == DemandedMask) { + unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); + SDValue InOp = Op.getOperand(0); + + // Compute the correct shift amount type, which must be getShiftAmountTy + // for scalar types after legalization. + EVT ShiftAmtTy = Op.getValueType(); + if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) + ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); + + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), InOp, ShiftAmt)); + } // Sign extension. Compute the demanded bits in the result that are not // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getScalarType().getSizeInBits()); + BitWidth - ExVT.getScalarType().getSizeInBits()); // If none of the extended bits are demanded, eliminate the sextinreg. if ((NewBits & NewMask) == 0) return TLO.CombineTo(Op, Op.getOperand(0)); APInt InSignBit = - APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth); + APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, - EVT.getScalarType().getSizeInBits()) & + ExVT.getScalarType().getSizeInBits()) & NewMask; // Since the sign extended bits are demanded, we know that the sign @@ -1622,7 +1640,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known zero, convert this into a zero extension. if (KnownZero.intersects(InSignBit)) return TLO.CombineTo(Op, - TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT)); + TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT)); if (KnownOne.intersects(InSignBit)) { // Input sign bit known set KnownOne |= NewBits; @@ -1688,11 +1706,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the sign bit is known one, the top bits match. if (KnownOne.intersects(InSignBit)) { - KnownOne |= NewBits; - KnownZero &= ~NewBits; + KnownOne |= NewBits; + assert((KnownZero & NewBits) == 0); } else { // Otherwise, top bits aren't known. - KnownOne &= ~NewBits; - KnownZero &= ~NewBits; + assert((KnownOne & NewBits) == 0); + assert((KnownZero & NewBits) == 0); } break; } @@ -1783,7 +1801,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::BITCAST: // If this is an FP->Int bitcast and if the sign bit is the only // thing demanded, turn this into a FGETSIGN. - if (!Op.getOperand(0).getValueType().isVector() && + if (!TLO.LegalOperations() && + !Op.getValueType().isVector() && + !Op.getOperand(0).getValueType().isVector() && NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); @@ -1824,7 +1844,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // FALL THROUGH default: // Just use ComputeMaskedBits to compute output bits. - TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); + TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); break; } @@ -1840,7 +1860,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -1851,7 +1870,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); } /// ComputeNumSignBitsForTargetNode - This method can be implemented by @@ -1895,9 +1914,8 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // Fall back to ComputeMaskedBits to catch other known cases. EVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(Val, KnownZero, KnownOne); return (KnownZero.countPopulation() == BitWidth - 1) && (KnownOne.countPopulation() == 1); } @@ -2060,7 +2078,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, Lod->getPointerInfo().getWithOffset(bestOffset), - false, false, NewAlign); + false, false, false, NewAlign); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), @@ -2393,8 +2411,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // We can always fold X == X for integer setcc's. - if (N0.getValueType().isInteger()) - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + if (N0.getValueType().isInteger()) { + switch (getBooleanContents(N0.getValueType().isVector())) { + case UndefinedBooleanContent: + case ZeroOrOneBooleanContent: + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + case ZeroOrNegativeOneBooleanContent: + return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT); + } + } unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); @@ -2428,6 +2453,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + // If RHS is a legal immediate value for a compare instruction, we need + // to be careful about increasing register pressure needlessly. + bool LegalRHSImm = false; + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 @@ -2462,25 +2491,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond); } } + + // Could RHSC fold directly into a compare? + if (RHSC->getValueType(0).getSizeInBits() <= 64) + LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue()); } // Simplify (X+Z) == X --> Z == 0 - if (N0.getOperand(0) == N1) - return DAG.getSetCC(dl, VT, N0.getOperand(1), - DAG.getConstant(0, N0.getValueType()), Cond); - if (N0.getOperand(1) == N1) { - if (DAG.isCommutativeBinOp(N0.getOpcode())) - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(0, N0.getValueType()), Cond); - else if (N0.getNode()->hasOneUse()) { - assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); - // (Z-X) == X --> Z == X<<1 - SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), - N1, + // Don't do this if X is an immediate that can fold into a cmp + // instruction and X+Z has other uses. It could be an induction variable + // chain, and the transform would increase register pressure. + if (!LegalRHSImm || N0.getNode()->hasOneUse()) { + if (N0.getOperand(0) == N1) + return DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(0, N0.getValueType()), Cond); + if (N0.getOperand(1) == N1) { + if (DAG.isCommutativeBinOp(N0.getOpcode())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, N0.getValueType()), Cond); + else if (N0.getNode()->hasOneUse()) { + assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + // (Z-X) == X --> Z == X<<1 + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, DAG.getConstant(1, getShiftAmountTy(N1.getValueType()))); - if (!DCI.isCalledByLegalizer()) - DCI.AddToWorklist(SH.getNode()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + } } } } @@ -2984,7 +3021,6 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( /// is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { - default: llvm_unreachable("Unknown constraint type!"); case TargetLowering::C_Other: case TargetLowering::C_Unknown: return 0; @@ -2995,6 +3031,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { case TargetLowering::C_Memory: return 3; } + llvm_unreachable("Invalid constraint type"); } /// Examine constraint type and operand type and determine a weight value. @@ -3242,8 +3279,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> -SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, - std::vector<SDNode*>* Created) const { +SDValue TargetLowering:: +BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, + std::vector<SDNode*>* Created) const { EVT VT = N->getValueType(0); DebugLoc dl= N->getDebugLoc(); @@ -3258,10 +3296,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type SDValue Q; - if (isOperationLegalOrCustom(ISD::MULHS, VT)) + if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) : + isOperationLegalOrCustom(ISD::MULHS, VT)) Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), DAG.getConstant(magics.m, VT)); - else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) + else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) : + isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N->getOperand(0), DAG.getConstant(magics.m, VT)).getNode(), 1); @@ -3299,8 +3339,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> -SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, - std::vector<SDNode*>* Created) const { +SDValue TargetLowering:: +BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, + std::vector<SDNode*>* Created) const { EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); @@ -3332,9 +3373,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type - if (isOperationLegalOrCustom(ISD::MULHU, VT)) + if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) : + isOperationLegalOrCustom(ISD::MULHU, VT)) Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT)); - else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) + else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) : + isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, DAG.getConstant(magics.m, VT)).getNode(), 1); else |