diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
25 files changed, 2245 insertions, 1971 deletions
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 0cfd5e1..799988a 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -1,5 +1,4 @@ add_llvm_library(LLVMSelectionDAG - CallingConvLower.cpp DAGCombiner.cpp FastISel.cpp FunctionLoweringInfo.cpp diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp deleted file mode 100644 index 4e6c1fc..0000000 --- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp +++ /dev/null @@ -1,179 +0,0 @@ -//===-- CallingConvLower.cpp - Calling Conventions ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the CCState class, used for lowering and implementing -// calling conventions. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, - SmallVector<CCValAssign, 16> &locs, LLVMContext &C) - : CallingConv(CC), IsVarArg(isVarArg), TM(tm), - TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) { - // No stack is used. - StackOffset = 0; - - UsedRegs.resize((TRI.getNumRegs()+31)/32); -} - -// HandleByVal - Allocate a stack slot large enough to pass an argument by -// value. The size and alignment information of the argument is encoded in its -// parameter attribute. -void CCState::HandleByVal(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - int MinSize, int MinAlign, - ISD::ArgFlagsTy ArgFlags) { - unsigned Align = ArgFlags.getByValAlign(); - unsigned Size = ArgFlags.getByValSize(); - if (MinSize > (int)Size) - Size = MinSize; - if (MinAlign > (int)Align) - Align = MinAlign; - unsigned Offset = AllocateStack(Size, Align); - - addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); -} - -/// MarkAllocated - Mark a register and all of its aliases as allocated. -void CCState::MarkAllocated(unsigned Reg) { - UsedRegs[Reg/32] |= 1 << (Reg&31); - - if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) - for (; (Reg = *RegAliases); ++RegAliases) - UsedRegs[Reg/32] |= 1 << (Reg&31); -} - -/// AnalyzeFormalArguments - Analyze an array of argument values, -/// incorporating info about the formals into this state. -void -CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, - CCAssignFn Fn) { - unsigned NumArgs = Ins.size(); - - for (unsigned i = 0; i != NumArgs; ++i) { - EVT ArgVT = Ins[i].VT; - ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Formal argument #" << i << " has unhandled type " - << ArgVT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - -/// CheckReturn - Analyze the return values of a function, returning true if -/// the return can be performed without sret-demotion, and false otherwise. -bool CCState::CheckReturn(const SmallVectorImpl<EVT> &OutTys, - const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - CCAssignFn Fn) { - // Determine which register each value should be copied into. - for (unsigned i = 0, e = OutTys.size(); i != e; ++i) { - EVT VT = OutTys[i]; - ISD::ArgFlagsTy ArgFlags = ArgsFlags[i]; - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) - return false; - } - return true; -} - -/// AnalyzeReturn - Analyze the returned values of a return, -/// incorporating info about the result values into this state. -void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, - CCAssignFn Fn) { - // Determine which register each value should be copied into. - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - EVT VT = Outs[i].Val.getValueType(); - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Return operand #" << i << " has unhandled type " - << VT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - - -/// AnalyzeCallOperands - Analyze the outgoing arguments to a call, -/// incorporating info about the passed values into this state. -void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, - CCAssignFn Fn) { - unsigned NumOps = Outs.size(); - for (unsigned i = 0; i != NumOps; ++i) { - EVT ArgVT = Outs[i].Val.getValueType(); - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - -/// AnalyzeCallOperands - Same as above except it takes vectors of types -/// and argument flags. -void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, - SmallVectorImpl<ISD::ArgFlagsTy> &Flags, - CCAssignFn Fn) { - unsigned NumOps = ArgVTs.size(); - for (unsigned i = 0; i != NumOps; ++i) { - EVT ArgVT = ArgVTs[i]; - ISD::ArgFlagsTy ArgFlags = Flags[i]; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - -/// AnalyzeCallResult - Analyze the return values of a call, -/// incorporating info about the passed values into this state. -void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, - CCAssignFn Fn) { - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - EVT VT = Ins[i].VT; - ISD::ArgFlagsTy Flags = Ins[i].Flags; - if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { -#ifndef NDEBUG - dbgs() << "Call result #" << i << " has unhandled type " - << VT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - -/// AnalyzeCallResult - Same as above except it's specialized for calls which -/// produce a single value. -void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) { - if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { -#ifndef NDEBUG - dbgs() << "Call result has unhandled type " - << VT.getEVTString(); -#endif - llvm_unreachable(0); - } -} diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6bddd78..e671752 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -211,6 +211,7 @@ namespace { SDValue visitBUILD_VECTOR(SDNode *N); SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitMEMBARRIER(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); @@ -668,7 +669,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; - return DAG.getExtLoad(ExtType, dl, PVT, + return DAG.getExtLoad(ExtType, PVT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), MemVT, LD->isVolatile(), @@ -890,7 +891,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); - SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, + SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), MemVT, LD->isVolatile(), @@ -1079,6 +1080,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::MEMBARRIER: return visitMEMBARRIER(N); } return SDValue(); } @@ -1313,7 +1315,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, GA->getOffset() + (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A @@ -1550,7 +1552,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { // fold (sub Sym, c) -> Sym-c if (N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, GA->getOffset() - (uint64_t)N1C->getSExtValue()); // fold (sub Sym+c1, Sym+c2) -> c1-c2 @@ -2028,7 +2030,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) - // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) // // do not sink logical op inside of a vector extend, since it may combine // into a vsetcc. @@ -2038,7 +2040,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // Avoid infinite looping with PromoteIntBinOp. (N0.getOpcode() == ISD::ANY_EXTEND && (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || - (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && + (N0.getOpcode() == ISD::TRUNCATE && + (!TLI.isZExtFree(VT, Op0VT) || + !TLI.isTruncateFree(Op0VT, VT)) && + TLI.isTypeLegal(Op0VT))) && !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { @@ -2193,7 +2198,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -2216,7 +2221,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -2250,7 +2255,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), @@ -2286,7 +2291,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), @@ -2317,7 +2322,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or x, undef) -> -1 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) { + if (!LegalOperations && + (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); } @@ -2425,6 +2431,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) return SDValue(Rot, 0); + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -3158,6 +3169,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return NewSRL; } + // Attempt to convert a srl of a load into a narrower zero-extending load. + SDValue NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.getNode()) + return NarrowLoad; + // Here is a common situation. We want to optimize: // // %a = ... @@ -3487,7 +3503,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -3531,7 +3547,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -3557,24 +3573,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -3635,10 +3651,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) && - (!TLI.isTruncateFree(N0.getOperand(0).getValueType(), - N0.getValueType()) || - !TLI.isZExtFree(N0.getValueType(), VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -3679,7 +3692,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -3723,7 +3736,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -3881,7 +3894,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -3925,8 +3938,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), - VT, LN0->getChain(), LN0->getBasePtr(), + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT, + N->getDebugLoc(), + LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->isNonTemporal(), @@ -3950,24 +3964,24 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -4024,6 +4038,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { /// extended, also fold the extension to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4040,6 +4055,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) return SDValue(); + } else if (Opc == ISD::SRL) { + // Annother special-case: SRL is basically zero-extending a narrower + // value. + ExtType = ISD::ZEXTLOAD; + N0 = SDValue(N, 0); + ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01) return SDValue(); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() - N01->getZExtValue()); } unsigned EVTBits = ExtVT.getSizeInBits(); @@ -4085,7 +4109,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) - : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, + : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), NewAlign); @@ -4172,7 +4196,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, @@ -4189,7 +4213,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, @@ -4243,8 +4267,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) - if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) - return ReduceLoadWidth(N); + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { + SDValue Reduced = ReduceLoadWidth(N); + if (Reduced.getNode()) + return Reduced; + } + + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -4943,7 +4976,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -5527,8 +5560,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getAlignment()) - return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), - LD->getValueType(0), + return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), + N->getDebugLoc(), Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), Align); @@ -5551,8 +5584,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); } else { - ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), - LD->getValueType(0), + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), + LD->getDebugLoc(), BetterChain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), @@ -6077,7 +6110,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. - EVT EltVT = InVec.getValueType().getVectorElementType(); SDValue InOp = InVec.getOperand(0); EVT NVT = N->getValueType(0); if (InOp.getValueType() != NVT) { @@ -6277,8 +6309,6 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { - return SDValue(); - EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -6334,6 +6364,59 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { + if (!TLI.getShouldFoldAtomicFences()) + return SDValue(); + + SDValue atomic = N->getOperand(0); + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + break; + default: + return SDValue(); + } + + SDValue fence = atomic.getOperand(0); + if (fence.getOpcode() != ISD::MEMBARRIER) + return SDValue(); + + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), + fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2), + atomic.getOperand(3)), atomic.getResNo()); + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), + fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2)), + atomic.getResNo()); + default: + return SDValue(); + } +} + /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform /// an AND to a vector_shuffle with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -6565,8 +6648,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType(), - TheSelect->getDebugLoc(), TheSelect->getValueType(0), + TheSelect->getDebugLoc(), LLD->getChain(), Addr, 0, 0, LLD->getMemoryVT(), LLD->isVolatile(), @@ -6807,38 +6890,34 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } } - // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X -> + // Check to see if this is an integer abs. + // select_cc setg[te] X, 0, X, -X -> + // select_cc setgt X, -1, X, -X -> + // select_cc setl[te] X, 0, -X, X -> + // select_cc setlt X, 1, -X, X -> // Y = sra (X, size(X)-1); xor (add (X, Y), Y) - if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && - N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && - N2.getOperand(0) == N1 && N0.getValueType().isInteger()) { + if (N1C) { + ConstantSDNode *SubC = NULL; + if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (N1C->isAllOnesValue() && CC == ISD::SETGT)) && + N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) + SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); + else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || + (N1C->isOne() && CC == ISD::SETLT)) && + N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) + SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); + EVT XType = N0.getValueType(); - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); - SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType, - N0, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); - } - // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X -> - // Y = sra (X, size(X)-1); xor (add (X, Y), Y) - if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && - N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { - if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) { - EVT XType = N0.getValueType(); - if (SubC->isNullValue() && XType.isInteger()) { - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, - N0, - DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); - SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), - XType, N0, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); - } + if (SubC && SubC->isNullValue() && XType.isInteger()) { + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + XType, N0, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); } } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 95f4d07..3f7e4a5 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -44,18 +44,38 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" -#include "FunctionLoweringInfo.h" using namespace llvm; +/// startNewBlock - Set the current block to which generated machine +/// instructions will be appended, and clear the local CSE map. +/// +void FastISel::startNewBlock() { + LocalValueMap.clear(); + + // Start out as null, meaining no local-value instructions have + // been emitted. + LastLocalValue = 0; + + // Advance the last local value past any EH_LABEL instructions. + MachineBasicBlock::iterator + I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end(); + while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) { + LastLocalValue = I; + ++I; + } +} + bool FastISel::hasTrivialKill(const Value *V) const { // Don't consider constants or arguments to have trivial kills. const Instruction *I = dyn_cast<Instruction>(V); @@ -99,25 +119,31 @@ unsigned FastISel::getRegForValue(const Value *V) { // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. - DenseMap<const Value *, unsigned>::iterator I = ValueMap.find(V); - if (I != ValueMap.end()) - return I->second; + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) { + unsigned Reg = I->second; + return Reg; + } unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; // In bottom-up mode, just create the virtual register which will be used // to hold the value. It will be materialized later. - if (IsBottomUp) { - Reg = createResultReg(TLI.getRegClassFor(VT)); - if (isa<Instruction>(V)) - ValueMap[V] = Reg; - else - LocalValueMap[V] = Reg; - return Reg; - } + if (isa<Instruction>(V) && + (!isa<AllocaInst>(V) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) + return FuncInfo.InitializeRegForValue(V); + + MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea(); + + // Materialize the value in a register. Emit any instructions in the + // local value area. + Reg = materializeRegForValue(V, VT); - return materializeRegForValue(V, VT); + leaveLocalValueArea(SaveInsertPt); + + return Reg; } /// materializeRegForValue - Helper for getRegForVale. This function is @@ -161,11 +187,15 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { } } } else if (const Operator *Op = dyn_cast<Operator>(V)) { - if (!SelectOperator(Op, Op->getOpcode())) return 0; - Reg = LocalValueMap[Op]; + if (!SelectOperator(Op, Op->getOpcode())) + if (!isa<Instruction>(Op) || + !TargetSelectInstruction(cast<Instruction>(Op))) + return 0; + Reg = lookUpRegForValue(Op); } else if (isa<UndefValue>(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } // If target-independent code couldn't handle the value, give target-specific @@ -175,8 +205,10 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { // Don't cache constant materializations in the general ValueMap. // To do so would require tracking what uses they dominate. - if (Reg != 0) + if (Reg != 0) { LocalValueMap[V] = Reg; + LastLocalValue = MRI.getVRegDef(Reg); + } return Reg; } @@ -185,8 +217,9 @@ unsigned FastISel::lookUpRegForValue(const Value *V) { // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. - if (ValueMap.count(V)) - return ValueMap[V]; + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) + return I->second; return LocalValueMap[V]; } @@ -202,14 +235,17 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { return Reg; } - unsigned &AssignedReg = ValueMap[I]; + unsigned &AssignedReg = FuncInfo.ValueMap[I]; if (AssignedReg == 0) + // Use the new register. AssignedReg = Reg; else if (Reg != AssignedReg) { - const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); - TII.copyRegToReg(*MBB, MBB->end(), AssignedReg, - Reg, RegClass, RegClass, DL); + // Arrange for uses of AssignedReg to be replaced by uses of Reg. + FuncInfo.RegFixups[AssignedReg] = Reg; + + AssignedReg = Reg; } + return AssignedReg; } @@ -237,6 +273,33 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { return std::pair<unsigned, bool>(IdxN, IdxNIsKill); } +void FastISel::recomputeInsertPt() { + if (getLastLocalValue()) { + FuncInfo.InsertPt = getLastLocalValue(); + ++FuncInfo.InsertPt; + } else + FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); + + // Now skip past any EH_LABELs, which must remain at the beginning. + while (FuncInfo.InsertPt != FuncInfo.MBB->end() && + FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL) + ++FuncInfo.InsertPt; +} + +MachineBasicBlock::iterator FastISel::enterLocalValueArea() { + MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; + recomputeInsertPt(); + return OldInsertPt; +} + +void FastISel::leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt) { + if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) + LastLocalValue = llvm::prior(FuncInfo.InsertPt); + + // Restore the previous insert position. + FuncInfo.InsertPt = OldInsertPt; +} + /// SelectBinaryOp - Select and emit code for a binary operator instruction, /// which has an opcode which directly corresponds to the given ISD opcode. /// @@ -345,7 +408,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { - if (CI->getZExtValue() == 0) continue; + if (CI->isZero()) continue; uint64_t Offs = TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); @@ -395,7 +458,7 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(I); if (!DIVariable(DI->getVariable()).Verify() || - !MF.getMMI().hasDebugInfo()) + !FuncInfo.MF->getMMI().hasDebugInfo()) return true; const Value *Address = DI->getAddress(); @@ -409,11 +472,12 @@ bool FastISel::SelectCall(const User *I) { // those are handled in SelectionDAGBuilder. if (AI) { DenseMap<const AllocaInst*, int>::iterator SI = - StaticAllocaMap.find(AI); - if (SI == StaticAllocaMap.end()) break; // VLAs. + FuncInfo.StaticAllocaMap.find(AI); + if (SI == FuncInfo.StaticAllocaMap.end()) break; // VLAs. int FI = SI->second; if (!DI->getDebugLoc().isUnknown()) - MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc()); + FuncInfo.MF->getMMI().setVariableDbgInfo(DI->getVariable(), + FI, DI->getDebugLoc()); } else // Building the map above is target independent. Generating DBG_VALUE // inline is target dependent; do this now. @@ -428,23 +492,28 @@ bool FastISel::SelectCall(const User *I) { if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(0U).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(CI->getZExtValue()).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addFPImm(CF).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { - BuildMI(MBB, DL, II).addReg(Reg, RegState::Debug).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Reg, RegState::Debug).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. // Insert an undef so we can see what we dropped. - BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(0U).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } return true; } @@ -453,14 +522,13 @@ bool FastISel::SelectCall(const User *I) { switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { default: break; case TargetLowering::Expand: { - assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!"); + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); unsigned Reg = TLI.getExceptionAddressRegister(); const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Reg, RC, RC, DL); - assert(InsertedCopy && "Can't copy address registers!"); - InsertedCopy = InsertedCopy; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); UpdateValueMap(I, ResultReg); return true; } @@ -472,25 +540,23 @@ bool FastISel::SelectCall(const User *I) { switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { default: break; case TargetLowering::Expand: { - if (MBB->isLandingPad()) - AddCatchInfo(*cast<CallInst>(I), &MF.getMMI(), MBB); + if (FuncInfo.MBB->isLandingPad()) + AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB); else { #ifndef NDEBUG - CatchInfoLost.insert(cast<CallInst>(I)); + FuncInfo.CatchInfoLost.insert(cast<CallInst>(I)); #endif // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) MBB->addLiveIn(Reg); + if (Reg) FuncInfo.MBB->addLiveIn(Reg); } unsigned Reg = TLI.getExceptionSelectorRegister(); EVT SrcVT = TLI.getPointerTy(); const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); unsigned ResultReg = createResultReg(RC); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg, - RC, RC, DL); - assert(InsertedCopy && "Can't copy address registers!"); - InsertedCopy = InsertedCopy; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); bool ResultRegIsKill = hasTrivialKill(I); @@ -605,12 +671,12 @@ bool FastISel::SelectBitCast(const User *I) { if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); - ResultReg = createResultReg(DstClass); - - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Op0, DstClass, SrcClass, DL); - if (!InsertedCopy) - ResultReg = 0; + // Don't attempt a cross-class copy. It will likely fail. + if (SrcClass == DstClass) { + ResultReg = createResultReg(DstClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Op0); + } } // If the reg-reg copy failed, select a BIT_CONVERT opcode. @@ -655,14 +721,15 @@ FastISel::SelectInstruction(const Instruction *I) { /// unless it is the immediate (fall-through) successor, and update /// the CFG. void -FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { - if (MBB->isLayoutSuccessor(MSucc)) { +FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { + if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // The unconditional fall-through case, which needs no instructions. } else { // The unconditional branch case. - TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>()); + TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, + SmallVector<MachineOperand, 0>(), DL); } - MBB->addSuccessor(MSucc); + FuncInfo.MBB->addSuccessor(MSucc); } /// SelectFNeg - Emit an FNeg operation. @@ -712,8 +779,39 @@ FastISel::SelectFNeg(const User *I) { } bool +FastISel::SelectLoad(const User *I) { + LoadInst *LI = const_cast<LoadInst *>(cast<LoadInst>(I)); + + // For a load from an alloca, make a limited effort to find the value + // already available in a register, avoiding redundant loads. + if (!LI->isVolatile() && isa<AllocaInst>(LI->getPointerOperand())) { + BasicBlock::iterator ScanFrom = LI; + if (const Value *V = FindAvailableLoadedValue(LI->getPointerOperand(), + LI->getParent(), ScanFrom)) { + if (!V->use_empty() && + (!isa<Instruction>(V) || + cast<Instruction>(V)->getParent() == LI->getParent() || + (isa<AllocaInst>(V) && + FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) && + (!isa<Argument>(V) || + LI->getParent() == &LI->getParent()->getParent()->getEntryBlock())) { + unsigned ResultReg = getRegForValue(V); + if (ResultReg != 0) { + UpdateValueMap(I, ResultReg); + return true; + } + } + } + } + + return false; +} + +bool FastISel::SelectOperator(const User *I, unsigned Opcode) { switch (Opcode) { + case Instruction::Load: + return SelectLoad(I); case Instruction::Add: return SelectBinaryOp(I, ISD::ADD); case Instruction::FAdd: @@ -762,8 +860,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { if (BI->isUnconditional()) { const BasicBlock *LLVMSucc = BI->getSuccessor(0); - MachineBasicBlock *MSucc = MBBMap[LLVMSucc]; - FastEmitBranch(MSucc); + MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc]; + FastEmitBranch(MSucc, BI->getDebugLoc()); return true; } @@ -778,7 +876,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { case Instruction::Alloca: // FunctionLowering has the static-sized case covered. - if (StaticAllocaMap.count(cast<AllocaInst>(I))) + if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I))) return true; // Dynamic-sized alloca is not handled yet. @@ -824,32 +922,16 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } } -FastISel::FastISel(MachineFunction &mf, - DenseMap<const Value *, unsigned> &vm, - DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am, - std::vector<std::pair<MachineInstr*, unsigned> > &pn -#ifndef NDEBUG - , SmallSet<const Instruction *, 8> &cil -#endif - ) - : MBB(0), - ValueMap(vm), - MBBMap(bm), - StaticAllocaMap(am), - PHINodesToUpdate(pn), -#ifndef NDEBUG - CatchInfoLost(cil), -#endif - MF(mf), - MRI(MF.getRegInfo()), - MFI(*MF.getFrameInfo()), - MCP(*MF.getConstantPool()), - TM(MF.getTarget()), +FastISel::FastISel(FunctionLoweringInfo &funcInfo) + : FuncInfo(funcInfo), + MRI(FuncInfo.MF->getRegInfo()), + MFI(*FuncInfo.MF->getFrameInfo()), + MCP(*FuncInfo.MF->getConstantPool()), + TM(FuncInfo.MF->getTarget()), TD(*TM.getTargetData()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - IsBottomUp(false) { + TRI(*TM.getRegisterInfo()) { } FastISel::~FastISel() {} @@ -978,7 +1060,7 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); - BuildMI(MBB, DL, II, ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); return ResultReg; } @@ -989,13 +1071,13 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0, Op0IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II).addReg(Op0, Op0IsKill * RegState::Kill); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; @@ -1009,17 +1091,15 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1032,17 +1112,15 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1055,17 +1133,15 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1079,19 +1155,17 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1103,13 +1177,11 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); else { - BuildMI(MBB, DL, II).addImm(Imm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1117,24 +1189,12 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { - const TargetRegisterClass* RC = MRI.getRegClass(Op0); - unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG); - - if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Idx); - else { - BuildMI(MBB, DL, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Idx); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; - } + assert(TargetRegisterInfo::isVirtualRegister(Op0) && + "Cannot yet extract from physregs"); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill), Idx); return ResultReg; } @@ -1154,14 +1214,14 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; - unsigned OrigNumPHINodesToUpdate = PHINodesToUpdate.size(); + unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa<PHINode>(SuccBB->begin())) continue; - MachineBasicBlock *SuccMBB = MBBMap[SuccBB]; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. @@ -1182,7 +1242,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // by bailing out early, we may leave behind some dead instructions, // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its // own moves. Second, this check is necessary becuase FastISel doesn't - // use CreateRegForValue to create registers, so it always creates + // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { @@ -1190,7 +1250,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (VT == MVT::i1) VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); else { - PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } } @@ -1205,10 +1265,10 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { unsigned Reg = getRegForValue(PHIOp); if (Reg == 0) { - PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } - PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); DL = DebugLoc(); } } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 65c36c1..928e1ec 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "function-lowering-info" -#include "FunctionLoweringInfo.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" @@ -30,7 +30,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" @@ -47,9 +46,11 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { if (isa<PHINode>(I)) return true; const BasicBlock *BB = I->getParent(); for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) - if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI)) + UI != E; ++UI) { + const User *U = *UI; + if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U)) return true; + } return false; } @@ -64,9 +65,11 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) { const BasicBlock *Entry = A->getParent()->begin(); for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) - if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI)) + UI != E; ++UI) { + const User *U = *UI; + if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) return false; // Use not in entry block. + } return true; } @@ -74,12 +77,18 @@ FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) : TLI(tli) { } -void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, - bool EnableFastISel) { +void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { Fn = &fn; MF = &mf; RegInfo = &MF->getRegInfo(); + // Check whether the function can return without sret-demotion. + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(Fn->getReturnType(), + Fn->getAttributes().getRetAttributes(), Outs, TLI); + CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(), + Outs, Fn->getContext()); + // Create a vreg for each argument register that is not dead and is used // outside of the entry block for the function. for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); @@ -172,31 +181,33 @@ void FunctionLoweringInfo::clear() { #endif LiveOutRegInfo.clear(); ArgDbgValues.clear(); + RegFixups.clear(); } -unsigned FunctionLoweringInfo::MakeReg(EVT VT) { +/// CreateReg - Allocate a single virtual register for the given type. +unsigned FunctionLoweringInfo::CreateReg(EVT VT) { return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); } -/// CreateRegForValue - Allocate the appropriate number of virtual registers of +/// CreateRegs - Allocate the appropriate number of virtual registers of /// the correctly promoted or expanded types. Assign these registers /// consecutive vreg numbers and return the first assigned number. /// /// In the case that the given value has struct or array type, this function /// will assign registers for each member or element. /// -unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { +unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, V->getType(), ValueVTs); + ComputeValueVTs(TLI, Ty, ValueVTs); unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; - EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT); + EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); - unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT); + unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { - unsigned R = MakeReg(RegisterVT); + unsigned R = CreateReg(RegisterVT); if (!FirstReg) FirstReg = R; } } @@ -208,7 +219,7 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB) { // Inform the MachineModuleInfo of the personality for this landing pad. - const ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2)); + const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1)); assert(CE->getOpcode() == Instruction::BitCast && isa<Function>(CE->getOperand(0)) && "Personality should be a function"); @@ -217,18 +228,18 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, // Gather all the type infos for this landing pad and pass them along to // MachineModuleInfo. std::vector<const GlobalVariable *> TyInfo; - unsigned N = I.getNumOperands(); + unsigned N = I.getNumArgOperands(); - for (unsigned i = N - 1; i > 2; --i) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) { + for (unsigned i = N - 1; i > 1; --i) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) { unsigned FilterLength = CI->getZExtValue(); unsigned FirstCatch = i + FilterLength + !FilterLength; - assert (FirstCatch <= N && "Invalid filter length"); + assert(FirstCatch <= N && "Invalid filter length"); if (FirstCatch < N) { TyInfo.reserve(N - FirstCatch); for (unsigned j = FirstCatch; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); MMI->addCatchTypeInfo(MBB, TyInfo); TyInfo.clear(); } @@ -240,7 +251,7 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, // Filter. TyInfo.reserve(FilterLength - 1); for (unsigned j = i + 1; j < FirstCatch; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); MMI->addFilterTypeInfo(MBB, TyInfo); TyInfo.clear(); } @@ -249,10 +260,10 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, } } - if (N > 3) { - TyInfo.reserve(N - 3); - for (unsigned j = 3; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + if (N > 2) { + TyInfo.reserve(N - 2); + for (unsigned j = 2; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); MMI->addCatchTypeInfo(MBB, TyInfo); } } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h deleted file mode 100644 index 4067a5b..0000000 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h +++ /dev/null @@ -1,144 +0,0 @@ -//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This implements routines for translating functions from LLVM IR into -// Machine IR. -// -//===----------------------------------------------------------------------===// - -#ifndef FUNCTIONLOWERINGINFO_H -#define FUNCTIONLOWERINGINFO_H - -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#ifndef NDEBUG -#include "llvm/ADT/SmallSet.h" -#endif -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/Support/CallSite.h" -#include <vector> - -namespace llvm { - -class AllocaInst; -class BasicBlock; -class CallInst; -class Function; -class GlobalVariable; -class Instruction; -class MachineInstr; -class MachineBasicBlock; -class MachineFunction; -class MachineModuleInfo; -class MachineRegisterInfo; -class TargetLowering; -class Value; - -//===--------------------------------------------------------------------===// -/// FunctionLoweringInfo - This contains information that is global to a -/// function that is used when lowering a region of the function. -/// -class FunctionLoweringInfo { -public: - const TargetLowering &TLI; - const Function *Fn; - MachineFunction *MF; - MachineRegisterInfo *RegInfo; - - /// CanLowerReturn - true iff the function's return value can be lowered to - /// registers. - bool CanLowerReturn; - - /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg - /// allocated to hold a pointer to the hidden sret parameter. - unsigned DemoteRegister; - - /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. - DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap; - - /// ValueMap - Since we emit code for the function a basic block at a time, - /// we must remember which virtual registers hold the values for - /// cross-basic-block values. - DenseMap<const Value*, unsigned> ValueMap; - - /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in - /// the entry block. This allows the allocas to be efficiently referenced - /// anywhere in the function. - DenseMap<const AllocaInst*, int> StaticAllocaMap; - - /// ArgDbgValues - A list of DBG_VALUE instructions created during isel for - /// function arguments that are inserted after scheduling is completed. - SmallVector<MachineInstr*, 8> ArgDbgValues; - -#ifndef NDEBUG - SmallSet<const Instruction *, 8> CatchInfoLost; - SmallSet<const Instruction *, 8> CatchInfoFound; -#endif - - struct LiveOutInfo { - unsigned NumSignBits; - APInt KnownOne, KnownZero; - LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {} - }; - - /// LiveOutRegInfo - Information about live out vregs, indexed by their - /// register number offset by 'FirstVirtualRegister'. - std::vector<LiveOutInfo> LiveOutRegInfo; - - /// PHINodesToUpdate - A list of phi instructions whose operand list will - /// be updated after processing the current basic block. - /// TODO: This isn't per-function state, it's per-basic-block state. But - /// there's no other convenient place for it to live right now. - std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate; - - explicit FunctionLoweringInfo(const TargetLowering &TLI); - - /// set - Initialize this FunctionLoweringInfo with the given Function - /// and its associated MachineFunction. - /// - void set(const Function &Fn, MachineFunction &MF, bool EnableFastISel); - - /// clear - Clear out all the function-specific state. This returns this - /// FunctionLoweringInfo to an empty state, ready to be used for a - /// different function. - void clear(); - - unsigned MakeReg(EVT VT); - - /// isExportedInst - Return true if the specified value is an instruction - /// exported from its block. - bool isExportedInst(const Value *V) { - return ValueMap.count(V); - } - - unsigned CreateRegForValue(const Value *V); - - unsigned InitializeRegForValue(const Value *V) { - unsigned &R = ValueMap[V]; - assert(R == 0 && "Already initialized this value register!"); - return R = CreateRegForValue(V); - } -}; - -/// AddCatchInfo - Extract the personality and type infos from an eh.selector -/// call, and add them to the specified machine basic block. -void AddCatchInfo(const CallInst &I, - MachineModuleInfo *MMI, MachineBasicBlock *MBB); - -/// CopyCatchInfo - Copy catch information from DestBB to SrcBB. -void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, - MachineModuleInfo *MMI, FunctionLoweringInfo &FLI); - -} // end namespace llvm - -#endif diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 16eb8a7..61c2a90 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -123,7 +123,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, EVT VT = Node->getValueType(ResNo); const TargetRegisterClass *SrcRC = 0, *DstRC = 0; - SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT); + SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); // Figure out the register class to create for the destreg. if (VRBase) { @@ -142,11 +142,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, } else { // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg, - DstRC, SrcRC, Node->getDebugLoc()); - - assert(Emitted && "Unable to issue a copy instruction!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + VRBase).addReg(SrcReg); } SDValue Op(Node, ResNo); @@ -246,7 +243,7 @@ unsigned InstrEmitter::getVR(SDValue Op, const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); VReg = MRI->createVirtualRegister(RC); } - BuildMI(MBB, Op.getDebugLoc(), + BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), VReg); return VReg; } @@ -288,10 +285,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, "Don't have operand info for this instruction!"); if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, - DstRC, SrcRC, Op.getNode()->getDebugLoc()); - assert(Emitted && "Unable to issue a copy instruction!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; } } @@ -428,12 +423,9 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, } if (Opc == TargetOpcode::EXTRACT_SUBREG) { + // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); - // Create the extract_subreg machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetOpcode::EXTRACT_SUBREG)); - // Figure out the register class to create for the destreg. unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(VReg); @@ -450,11 +442,16 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBase = MRI->createVirtualRegister(SRC); } - // Add def, source, and subreg index - MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + // Create the extract_subreg machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); + + // Add source, and subreg index AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); - MI->addOperand(MachineOperand::CreateImm(SubIdx)); + assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) && + "Cannot yet extract from physregs"); + MI->getOperand(1).setSubReg(SubIdx); MBB->insert(InsertPos, MI); } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { @@ -511,18 +508,13 @@ void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); - const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); + // Create the new VReg in the destination class and emit a copy. unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx); - - // Create the new VReg in the destination class and emit a copy. unsigned NewVReg = MRI->createVirtualRegister(DstRC); - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, - DstRC, SrcRC, Node->getDebugLoc()); - assert(Emitted && - "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + NewVReg).addReg(VReg); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; @@ -604,9 +596,10 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - // FIXME: SDDbgValues aren't updated with legalization, so it's possible - // to have i128 values in them at this point. As a crude workaround, just - // drop the debug info if this happens. + // FIXME: SDDbgValue constants aren't updated with legalization, so it's + // possible to have i128 constants in them at this point. Dwarf writer + // does not handle i128 constants at the moment so, as a crude workaround, + // just drop the debug info if this happens. if (!CI->getValue().isSignedIntN(64)) MIB.addReg(0U); else @@ -676,6 +669,33 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Create the new machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); + + // The MachineInstr constructor adds implicit-def operands. Scan through + // these to determine which are dead. + if (MI->getNumOperands() != 0 && + Node->getValueType(Node->getNumValues()-1) == MVT::Flag) { + // First, collect all used registers. + SmallVector<unsigned, 8> UsedRegs; + for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser()) + if (F->getOpcode() == ISD::CopyFromReg) + UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); + else { + // Collect declared implicit uses. + const TargetInstrDesc &TID = TII->get(F->getMachineOpcode()); + UsedRegs.append(TID.getImplicitUses(), + TID.getImplicitUses() + TID.getNumImplicitUses()); + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { + unsigned Reg = R->getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) + UsedRegs.push_back(Reg); + } + } + // Then mark unused registers as dead. + MI->setPhysRegsDeadExcept(UsedRegs, *TRI); + } // Add result register values for things that are defined by this // instruction. @@ -696,16 +716,24 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), cast<MachineSDNode>(Node)->memoperands_end()); + // Insert the instruction into position in the block. This needs to + // happen before any custom inserter hook is called so that the + // hook knows where in the block to insert the replacement code. + MBB->insert(InsertPos, MI); + if (II.usesCustomInsertionHook()) { // Insert this instruction into the basic block using a target // specific inserter which may returns a new basic block. - MBB = TLI->EmitInstrWithCustomInserter(MI, MBB); - InsertPos = MBB->end(); + bool AtEnd = InsertPos == MBB->end(); + MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); + if (NewMBB != MBB) { + if (AtEnd) + InsertPos = NewMBB->end(); + MBB = NewMBB; + } return; } - MBB->insert(InsertPos, MI); - // Additional results must be an physical register def. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { @@ -761,24 +789,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; - - const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0; - // Get the register classes of the src/dst. - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) - SrcTRC = MRI->getRegClass(SrcReg); - else - SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType()); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) - DstTRC = MRI->getRegClass(DestReg); - else - DstTRC = TRI->getPhysicalRegisterRegClass(DestReg, - Node->getOperand(1).getValueType()); - - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg, - DstTRC, SrcTRC, Node->getDebugLoc()); - assert(Emitted && "Unable to issue a copy instruction!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + DestReg).addReg(SrcReg); break; } case ISD::CopyFromReg: { @@ -807,6 +820,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); + // Add the isAlignStack bit. + int64_t isAlignStack = + cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))-> + getZExtValue(); + MI->addOperand(MachineOperand::CreateImm(isAlignStack)); + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = @@ -821,14 +840,22 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case InlineAsm::Kind_RegDef: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - MI->addOperand(MachineOperand::CreateReg(Reg, true)); + // FIXME: Add dead flags for physical and virtual registers defined. + // For now, mark physical register defs as implicit to help fast + // regalloc. This makes inline asm look a lot like calls. + MI->addOperand(MachineOperand::CreateReg(Reg, true, + /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, - false, false, true)); + MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, + /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), + /*isKill=*/ false, + /*isDead=*/ false, + /*isUndef=*/false, + /*isEarlyClobber=*/ true)); } break; case InlineAsm::Kind_RegUse: // Use of register. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 62a37a5..7a47da4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -31,6 +31,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" @@ -133,7 +134,7 @@ private: /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, - SDValue N1, SDValue N2, + SDValue N1, SDValue N2, SmallVectorImpl<int> &Mask) const; bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, @@ -143,6 +144,8 @@ private: DebugLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_PPCF128); @@ -172,6 +175,8 @@ private: SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); + std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); + void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); }; @@ -181,8 +186,8 @@ private: /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> -SDValue -SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, +SDValue +SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, SmallVectorImpl<int> &Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); @@ -193,12 +198,12 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, if (NumEltsGrowth == 1) return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); - + SmallVector<int, 8> NewMask; for (unsigned i = 0; i != NumMaskElts; ++i) { int Idx = Mask[i]; for (unsigned j = 0; j != NumEltsGrowth; ++j) { - if (Idx < 0) + if (Idx < 0) NewMask.push_back(-1); else NewMask.push_back(Idx * NumEltsGrowth + j); @@ -320,7 +325,8 @@ bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, bool OperandsLeadToDest = false; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) OperandsLeadToDest |= // If an operand leads to Dest, so do we. - LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo); + LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, + NodesLeadingTo); if (OperandsLeadToDest) { NodesLeadingTo.insert(N); @@ -357,7 +363,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, EVT SVT = VT; while (SVT != MVT::f32) { SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); - if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) && + if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from // smaller type. TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && @@ -372,8 +378,8 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); if (Extend) - return DAG.getExtLoad(ISD::EXTLOAD, dl, - OrigVT, DAG.getEntryNode(), + return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl, + DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, VT, false, false, Alignment); return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, @@ -450,7 +456,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, 8 * (StoredBytes - Offset)); // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr, NULL, 0, MemVT, false, false, 0); Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, @@ -552,7 +558,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // The last copy may be partial. Do an extending load. EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset)); - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + Offset, MemVT, LD->isVolatile(), LD->isNonTemporal(), @@ -568,7 +574,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, Stores.size()); // Finally, perform the original load only redirected to the stack slot. - Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase, NULL, 0, LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. @@ -597,23 +603,23 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Load the value in two parts SDValue Lo, Hi; if (TLI.isLittleEndian()) { - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); - Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); } else { - Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); } // aggregate the two parts @@ -773,7 +779,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { "Unexpected illegal type!"); for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - assert((isTypeLegal(Node->getOperand(i).getValueType()) || + assert((isTypeLegal(Node->getOperand(i).getValueType()) || Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); @@ -853,6 +859,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::MERGE_VALUES: case ISD::EH_RETURN: case ISD::FRAME_TO_ARGS_OFFSET: + case ISD::EH_SJLJ_SETJMP: + case ISD::EH_SJLJ_LONGJMP: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -925,8 +933,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } - Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(), - Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), + Ops.size()), 0); switch (Action) { case TargetLowering::Legal: for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) @@ -1000,11 +1008,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { NodesLeadingTo); } - // Now that we legalized all of the inputs (which may have inserted - // libcalls) create the new CALLSEQ_START node. + // Now that we have legalized all of the inputs (which may have inserted + // libcalls), create the new CALLSEQ_START node. Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - // Merge in the last call, to ensure that this call start after the last + // Merge in the last call to ensure that this call starts after the last // call ended. if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -1016,7 +1024,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Tmp1 != Node->getOperand(0)) { SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; - Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], + Ops.size()), Result.getResNo()); } // Remember that the CALLSEQ_START is legalized. @@ -1058,7 +1067,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Tmp1 != Node->getOperand(0)) { SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; - Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); } } else { Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); @@ -1067,7 +1078,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; Ops.back() = Tmp2; - Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); } } assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); @@ -1087,7 +1100,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); Tmp3 = Result.getValue(0); Tmp4 = Result.getValue(1); @@ -1100,7 +1115,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG, TLI); Tmp3 = Result.getOperand(0); Tmp4 = Result.getOperand(1); @@ -1166,7 +1181,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset, NVT, isVolatile, isNonTemporal, Alignment); @@ -1202,8 +1217,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (TLI.isLittleEndian()) { // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) // Load the bottom RoundWidth bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, - Node->getValueType(0), Tmp1, Tmp2, + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl, + Tmp1, Tmp2, LD->getSrcValue(), SVOffset, RoundVT, isVolatile, isNonTemporal, Alignment); @@ -1211,13 +1226,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { IncrementSize = RoundWidth / 8; Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset + IncrementSize, ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); - // Build a factor node to remember that this load is independent of the - // other one. + // Build a factor node to remember that this load is independent of + // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); @@ -1231,7 +1246,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 // Load the top RoundWidth bits. - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset, RoundVT, isVolatile, isNonTemporal, Alignment); @@ -1239,14 +1254,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { IncrementSize = RoundWidth / 8; Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, - Node->getValueType(0), Tmp1, Tmp2, + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, + Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset + IncrementSize, ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); - // Build a factor node to remember that this load is independent of the - // other one. + // Build a factor node to remember that this load is independent of + // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); @@ -1267,7 +1282,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); Tmp1 = Result.getValue(0); Tmp2 = Result.getValue(1); @@ -1281,10 +1298,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // If this is an unaligned load and the target doesn't support it, // expand it. if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + const Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG, TLI); Tmp1 = Result.getOperand(0); Tmp2 = Result.getOperand(1); @@ -1310,10 +1329,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Load.getValue(1)); break; } - assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!"); + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. - Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), SrcVT, LD->isVolatile(), LD->isNonTemporal(), @@ -1355,8 +1375,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { { Tmp3 = LegalizeOp(ST->getValue()); - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, - ST->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { @@ -1366,7 +1388,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG, TLI); @@ -1459,8 +1481,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } else { if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || Tmp2 != ST->getBasePtr()) - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, - ST->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1469,7 +1493,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG, TLI); @@ -1531,7 +1555,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0, false, false, 0); else - return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, + return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr, NULL, 0, Vec.getValueType().getVectorElementType(), false, false, 0); } @@ -1568,7 +1592,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { Node->getOperand(i), Idx, SV, Offset, EltVT, false, false, 0)); } else - Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, SV, Offset, false, false, 0)); } @@ -1763,7 +1787,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); - return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT, + return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT, false, false, DestAlign); } @@ -1926,6 +1950,44 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return CallInfo.first; } +// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair<SDValue, SDValue> +SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, + bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo; +} + SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, @@ -2048,7 +2110,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr); SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr); - SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52); + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, + TwoP84PlusTwoP52); return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } @@ -2058,11 +2121,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { EVT SHVT = TLI.getShiftAmountTy(); - SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, DAG.getConstant(UINT64_C(0x800), MVT::i64)); - SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); @@ -2122,7 +2185,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, false, false, Alignment); else { FudgeInReg = - LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, MVT::f32, false, false, Alignment)); @@ -2332,6 +2395,92 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, } } +std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { + unsigned Opc = Node->getOpcode(); + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC; + + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic intrinsic Expand!"); + break; + case ISD::ATOMIC_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; + case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; + case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; + case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + } + break; + case ISD::ATOMIC_CMP_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; + case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; + case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; + case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + } + break; + case ISD::ATOMIC_LOAD_ADD: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + } + break; + case ISD::ATOMIC_LOAD_SUB: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + } + break; + case ISD::ATOMIC_LOAD_AND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + } + break; + case ISD::ATOMIC_LOAD_OR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + } + break; + case ISD::ATOMIC_LOAD_XOR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + } + break; + case ISD::ATOMIC_LOAD_NAND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + } + break; + } + + return ExpandChainLibCall(LC, Node, false); +} + void SelectionDAGLegalize::ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results) { DebugLoc dl = Node->getDebugLoc(); @@ -2357,10 +2506,48 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::EH_RETURN: case ISD::EH_LABEL: case ISD::PREFETCH: - case ISD::MEMBARRIER: case ISD::VAEND: + case ISD::EH_SJLJ_LONGJMP: + Results.push_back(Node->getOperand(0)); + break; + case ISD::EH_SJLJ_SETJMP: + Results.push_back(DAG.getConstant(0, MVT::i32)); Results.push_back(Node->getOperand(0)); break; + case ISD::MEMBARRIER: { + // If the target didn't lower this, lower it to '__sync_synchronize()' call + TargetLowering::ArgListTy Args; + std::pair<SDValue, SDValue> CallResult = + TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy()), + Args, DAG, dl); + Results.push_back(CallResult.second); + break; + } + // By default, atomic intrinsics are marked Legal and lowered. Targets + // which don't support them directly, however, may want libcalls, in which + // case they mark them Expand, and we get here. + // FIXME: Unimplemented for now. Add libcalls. + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_CMP_SWAP: { + std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; @@ -2465,15 +2652,31 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); - SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, - false, false, 0); + unsigned Align = Node->getConstantOperandVal(3); + + SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, + false, false, 0); + SDValue VAList = VAListLoad; + + if (Align > TLI.getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(Align - 1, + TLI.getPointerTy())); + + VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(-Align, + TLI.getPointerTy())); + } + // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, DAG.getConstant(TLI.getTargetData()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0, + Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0, false, false, 0); // Load the actual argument out of the pointer VAList Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, @@ -2496,7 +2699,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. - Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), Node->getOperand(0)); else Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); @@ -2948,13 +3151,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, const TargetData &TD = *TLI.getTargetData(); unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - + Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(EntrySize, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); - SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, + SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr, PseudoSourceValue::getJumpTable(), 0, MemVT, false, false, 0); Addr = LD; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e3eb949..650ee5a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -453,8 +453,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { SDValue NewL; if (L->getExtensionType() == ISD::NON_EXTLOAD) { - NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(), - NVT, L->getChain(), L->getBasePtr(), L->getOffset(), + NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), + NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getSrcValue(), L->getSrcValueOffset(), NVT, L->isVolatile(), L->isNonTemporal(), L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to @@ -464,8 +464,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { } // Do a non-extending load followed by FP_EXTEND. - NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD, - L->getMemoryVT(), L->getChain(), + NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, + L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getSrcValue(), L->getSrcValueOffset(), L->getMemoryVT(), L->isVolatile(), @@ -504,7 +504,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SDValue NewVAARG; - NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); + NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -698,9 +699,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), DAG.getCondCode(CCCode), NewLHS, NewRHS, - N->getOperand(4)); + N->getOperand(4)), + 0); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { @@ -739,9 +741,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, N->getOperand(2), N->getOperand(3), - DAG.getCondCode(CCCode)); + DAG.getCondCode(CCCode)), + 0); } SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { @@ -757,8 +760,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, - DAG.getCondCode(CCCode)); + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), + 0); } SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { @@ -1106,7 +1110,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); - Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, + Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); @@ -1294,9 +1298,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), DAG.getCondCode(CCCode), NewLHS, NewRHS, - N->getOperand(4)); + N->getOperand(4)), 0); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { @@ -1375,9 +1379,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, N->getOperand(2), N->getOperand(3), - DAG.getCondCode(CCCode)); + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { @@ -1393,8 +1397,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, - DAG.getCondCode(CCCode)); + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8b382bc..b94ea9a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -369,7 +369,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); DebugLoc dl = N->getDebugLoc(); - SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), + SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), N->getMemoryVT(), N->isVolatile(), N->isNonTemporal(), N->getAlignment()); @@ -572,7 +572,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SmallVector<SDValue, 8> Parts(NumRegs); for (unsigned i = 0; i < NumRegs; ++i) { - Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2)); + Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); Chain = Parts[i].getValue(1); } @@ -725,8 +726,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) { // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always // legal types. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), - N->getOperand(1), LHS, RHS, N->getOperand(4)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)), + 0); } SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { @@ -737,8 +739,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); // The chain (Op#0) and basic block destination (Op#2) are always legal types. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond, - N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, + N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { @@ -773,7 +775,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { for (unsigned i = 0; i < NumElts; ++i) NewOps.push_back(GetPromotedInteger(N->getOperand(i))); - return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts); + return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { @@ -798,17 +800,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, assert(N->getOperand(1).getValueType().getSizeInBits() >= N->getValueType(0).getVectorElementType().getSizeInBits() && "Type of inserted value narrower than vector element type!"); - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), GetPromotedInteger(N->getOperand(1)), - N->getOperand(2)); + N->getOperand(2)), + 0); } assert(OpNo == 2 && "Different operand and result vector types?"); // Promote the index. SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), - N->getOperand(1), Idx); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), Idx), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { @@ -819,15 +822,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { SDValue Flag = GetPromotedInteger(N->getOperand(i)); NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1); } - return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps, - array_lengthof(NewOps)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote // the operand in place. - return DAG.UpdateNodeOperands(SDValue(N, 0), - GetPromotedInteger(N->getOperand(0))); + return SDValue(DAG.UpdateNodeOperands(N, + GetPromotedInteger(N->getOperand(0))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { @@ -837,8 +839,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT); - return DAG.UpdateNodeOperands(SDValue(N, 0), Cond, - N->getOperand(1), N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, Cond, + N->getOperand(1), N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { @@ -849,8 +851,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get()); // The CC (#4) and the possible return values (#2 and #3) have legal types. - return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2), - N->getOperand(3), N->getOperand(4)); + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { @@ -861,12 +863,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get()); // The CC (#2) is always legal. - return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), - ZExtPromotedInteger(N->getOperand(1))); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { @@ -878,8 +880,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { - return DAG.UpdateNodeOperands(SDValue(N, 0), - SExtPromotedInteger(N->getOperand(0))); + return SDValue(DAG.UpdateNodeOperands(N, + SExtPromotedInteger(N->getOperand(0))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ @@ -905,8 +907,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { - return DAG.UpdateNodeOperands(SDValue(N, 0), - ZExtPromotedInteger(N->getOperand(0))); + return SDValue(DAG.UpdateNodeOperands(N, + ZExtPromotedInteger(N->getOperand(0))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { @@ -990,6 +992,11 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SHL: case ISD::SRA: case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; + + case ISD::SADDO: + case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; + case ISD::UADDO: + case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1526,7 +1533,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, if (N->getMemoryVT().bitsLE(NVT)) { EVT MemVT = N->getMemoryVT(); - Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); // Remember the chain. @@ -1559,7 +1566,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), + Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, NEVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); @@ -1577,7 +1584,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned ExcessBits = (EBytes - IncrementSize)*8; // Load both the high bits and maybe some of the low bits. - Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), isVolatile, isNonTemporal, Alignment); @@ -1586,7 +1593,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); // Load the rest of the low bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, @@ -1716,6 +1723,48 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + DebugLoc dl = Node->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Compute the overflow. + // + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + EVT OType = Node->getValueType(1); + SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(Node, 1), Cmp); +} + void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -1912,6 +1961,29 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); } +void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Calculate the overflow: addition overflows iff a + b < a, and subtraction + // overflows iff a - b > a. + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, + N->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -2154,9 +2226,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), DAG.getCondCode(CCCode), NewLHS, NewRHS, - N->getOperand(4)); + N->getOperand(4)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { @@ -2172,9 +2244,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, N->getOperand(2), N->getOperand(3), - DAG.getCondCode(CCCode)); + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { @@ -2190,8 +2262,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, - DAG.getCondCode(CCCode)); + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { @@ -2200,7 +2272,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { // upper half of the shift amount is zero. Just use the lower half. SDValue Lo, Hi; GetExpandedInteger(N->getOperand(1), Lo, Hi); - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { @@ -2209,7 +2281,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { // constant to valid type. SDValue Lo, Hi; GetExpandedInteger(N->getOperand(0), Lo, Hi); - return DAG.UpdateNodeOperands(SDValue(N, 0), Lo); + return SDValue(DAG.UpdateNodeOperands(N, Lo), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { @@ -2384,7 +2456,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // Load the value out, extending it from f32 to the destination float type. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(), FudgePtr, NULL, 0, MVT::f32, false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 17f131b..6e56c98 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -485,15 +485,14 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { NewOps.push_back(Op); } else if (Op != OrigOp) { // This is the first operand to change - add all operands so far. - NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i); + NewOps.append(N->op_begin(), N->op_begin() + i); NewOps.push_back(Op); } } // Some operands changed - update the node. if (!NewOps.empty()) { - SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], - NewOps.size()).getNode(); + SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size()); if (M != N) { // The node morphed into a different node. Normally for this to happen // the original node would have to be marked NewNode. However this can @@ -684,40 +683,45 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { // can potentially cause recursive merging. SmallSetVector<SDNode*, 16> NodesToAnalyze; NodeUpdateListener NUL(*this, NodesToAnalyze); - DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); - - // The old node may still be present in a map like ExpandedIntegers or - // PromotedIntegers. Inform maps about the replacement. - ReplacedValues[From] = To; - - // Process the list of nodes that need to be reanalyzed. - while (!NodesToAnalyze.empty()) { - SDNode *N = NodesToAnalyze.back(); - NodesToAnalyze.pop_back(); - if (N->getNodeId() != DAGTypeLegalizer::NewNode) - // The node was analyzed while reanalyzing an earlier node - it is safe to - // skip. Note that this is not a morphing node - otherwise it would still - // be marked NewNode. - continue; + do { + DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); + + // The old node may still be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + ReplacedValues[From] = To; + + // Process the list of nodes that need to be reanalyzed. + while (!NodesToAnalyze.empty()) { + SDNode *N = NodesToAnalyze.back(); + NodesToAnalyze.pop_back(); + if (N->getNodeId() != DAGTypeLegalizer::NewNode) + // The node was analyzed while reanalyzing an earlier node - it is safe + // to skip. Note that this is not a morphing node - otherwise it would + // still be marked NewNode. + continue; - // Analyze the node's operands and recalculate the node ID. - SDNode *M = AnalyzeNewNode(N); - if (M != N) { - // The node morphed into a different node. Make everyone use the new node - // instead. - assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); - assert(N->getNumValues() == M->getNumValues() && - "Node morphing changed the number of results!"); - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - SDValue OldVal(N, i); - SDValue NewVal(M, i); - if (M->getNodeId() == Processed) - RemapValue(NewVal); - DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + // Analyze the node's operands and recalculate the node ID. + SDNode *M = AnalyzeNewNode(N); + if (M != N) { + // The node morphed into a different node. Make everyone use the new + // node instead. + assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + SDValue OldVal(N, i); + SDValue NewVal(M, i); + if (M->getNodeId() == Processed) + RemapValue(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + } + // The original node continues to exist in the DAG, marked NewNode. } - // The original node continues to exist in the DAG, marked NewNode. } - } + // When recursively update nodes with new nodes, it is possible to have + // new uses of From due to CSE. If this happens, replace the new uses of + // From with To. + } while (!From.use_empty()); } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c665963..bd86694 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -345,6 +345,9 @@ private: void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -620,6 +623,7 @@ private: SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 88e1e62..9c2b1d9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -238,13 +238,15 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, } void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Chain = N->getOperand(0); SDValue Ptr = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); + const unsigned Align = N->getConstantOperandVal(3); - Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); - Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2)); + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); // Handle endianness of the load. if (TLI.isBigEndian()) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 0e2bd02..621c087 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -116,7 +116,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Ops.push_back(LegalizeOp(Node->getOperand(i))); SDValue Result = - DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size()); + SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); bool HasVectorValue = false; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7efeea1..93aeff5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -165,9 +165,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); - SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(), + SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getExtensionType(), N->getValueType(0).getVectorElementType(), + N->getDebugLoc(), N->getChain(), N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), N->getSrcValue(), N->getSrcValueOffset(), @@ -448,6 +449,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -755,14 +761,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); - Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset, + Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); SVOffset += IncrementSize; - Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset, + Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment); // Build a factor node to remember that this load is independent of the @@ -1082,10 +1088,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { uint64_t LoElts = Lo.getValueType().getVectorNumElements(); if (IdxVal < LoElts) - return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx); - return DAG.UpdateNodeOperands(SDValue(N, 0), Hi, + return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); + return SDValue(DAG.UpdateNodeOperands(N, Hi, DAG.getConstant(IdxVal - LoElts, - Idx.getValueType())); + Idx.getValueType())), + 0); } // Store the vector to the stack. @@ -1099,7 +1106,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); - return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr, SV, 0, EltVT, false, false, 0); } @@ -1199,7 +1206,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: - case ISD::FPOWI: case ISD::FREM: case ISD::FSUB: case ISD::MUL: @@ -1215,6 +1221,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Binary(N); break; + case ISD::FPOWI: + Res = WidenVecRes_POWI(N); + break; + case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -1241,6 +1251,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEG: case ISD::FSIN: case ISD::FSQRT: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: Res = WidenVecRes_Unary(N); break; } @@ -1258,7 +1273,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); - while (!TLI.isTypeLegal(VT) && NumElts != 1) { + while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); } @@ -1273,13 +1288,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); } else { // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); SmallVector<SDValue, 16> ConcatOps(CurNumElts); unsigned ConcatEnd = 0; // Current ConcatOps index. - unsigned Idx = 0; // Current Idx into input vectors. + int Idx = 0; // Current Idx into input vectors. + + // NumElts := greatest synthesizable vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 + // } while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, @@ -1290,26 +1312,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { Idx += NumElts; CurNumElts -= NumElts; } - EVT PrevVecVT = VT; do { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); - } while (!TLI.isTypeLegal(VT) && NumElts != 1); + } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1); if (NumElts == 1) { - // Since we are using concat vector, build a vector from the scalar ops. - SDValue VecOp = DAG.getUNDEF(PrevVecVT); for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1, DAG.getIntPtrConstant(Idx)); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, DAG.getIntPtrConstant(Idx)); - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp, - DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2), - DAG.getIntPtrConstant(i)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, + EOp1, EOp2); } CurNumElts = 0; - ConcatOps[ConcatEnd++] = VecOp; } } @@ -1320,23 +1337,65 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return ConcatOps[0]; } - // Rebuild vector to one with the widen type - Idx = ConcatEnd - 1; - while (Idx != 0) { + // while (Some element of ConcatOps is not of type MaxVT) { + // From the end of ConcatOps, collect elements of the same type and put + // them into an op of the next larger supported type + // } + while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { + Idx = ConcatEnd - 1; VT = ConcatOps[Idx--].getValueType(); - while (Idx != 0 && ConcatOps[Idx].getValueType() == VT) - --Idx; - if (Idx != 0) { - VT = ConcatOps[Idx].getValueType(); - ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - &ConcatOps[Idx+1], ConcatEnd - Idx - 1); + while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) + Idx--; + + int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; + EVT NextVT; + do { + NextSize *= 2; + NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); + } while (!TLI.isTypeSynthesizable(NextVT)); + + if (!VT.isVector()) { + // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT + SDValue VecOp = DAG.getUNDEF(NextVT); + unsigned NumToInsert = ConcatEnd - Idx - 1; + for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, + ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); + } + ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; + } + else { + // Vector type, create a CONCAT_VECTORS of type NextVT + SDValue undefVec = DAG.getUNDEF(VT); + unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); + SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); + unsigned RealVals = ConcatEnd - Idx - 1; + unsigned SubConcatEnd = 0; + unsigned SubConcatIdx = Idx + 1; + while (SubConcatEnd < RealVals) + SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; + while (SubConcatEnd < OpsToConcat) + SubConcatOps[SubConcatEnd++] = undefVec; + ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NextVT, &SubConcatOps[0], + OpsToConcat); + ConcatEnd = SubConcatIdx + 1; } } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } - unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements(); + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT + unsigned NumOps = + WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); if (NumOps != ConcatEnd ) { - SDValue UndefVal = DAG.getUNDEF(VT); + SDValue UndefVal = DAG.getUNDEF(MaxVT); for (unsigned j = ConcatEnd; j < NumOps; ++j) ConcatOps[j] = UndefVal; } @@ -1366,7 +1425,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(Opcode, dl, WidenVT, InOp); } - if (TLI.isTypeLegal(InWidenVT)) { + if (TLI.isTypeSynthesizable(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1410,6 +1469,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); } +SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); +} + SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); @@ -1501,7 +1567,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } - if (TLI.isTypeLegal(NewInVT)) { + if (TLI.isTypeSynthesizable(NewInVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1642,7 +1708,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SatOp, CvtCode); } - if (TLI.isTypeLegal(InWidenVT)) { + if (TLI.isTypeSynthesizable(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1968,7 +2034,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { if (InWidenSize % Size == 0 && !VT.isVector()) { unsigned NewNumElts = InWidenSize / Size; EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); - if (TLI.isTypeLegal(NewVT)) { + if (TLI.isTypeSynthesizable(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, DAG.getIntPtrConstant(0)); @@ -2066,7 +2132,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (MemVT.getSizeInBits() <= WidenEltWidth) break; - if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { RetVT = MemVT; @@ -2080,7 +2146,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { EVT MemVT = (MVT::SimpleValueType) VT; unsigned MemVTWidth = MemVT.getSizeInBits(); - if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && + if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { @@ -2286,14 +2352,14 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(WidenNumElts); unsigned Increment = LdEltVT.getSizeInBits() / 8; - Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, + Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset, LdEltVT, isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Offset)); - Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV, + Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV, SVOffset + Offset, LdEltVT, isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[i].getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index ad8630a..3b86c32 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -535,7 +535,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { SUnit *LRDef = LiveRegDefs[Reg]; EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, VT); + TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); // If cross copy register class is null, then it must be possible copy diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 820ba66..3ef521c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -320,7 +320,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { CapturePred(&*I); - if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { + if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(LiveRegDefs[I->getReg()] == I->getSUnit() && "Physical register dependency violated?"); @@ -795,7 +795,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { SUnit *LRDef = LiveRegDefs[Reg]; EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, VT); + TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); // If cross copy register class is null, then it must be possible copy @@ -1116,7 +1116,7 @@ namespace { SUnit *pop() { if (empty()) return NULL; std::vector<SUnit *>::iterator Best = Queue.begin(); - for (std::vector<SUnit *>::iterator I = next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; @@ -1275,6 +1275,17 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ return left->getHeight() > right->getHeight(); } else if (RStall) return false; + + // If either node is scheduling for latency, sort them by height and latency + // first. + if (left->SchedulingPref == Sched::Latency || + right->SchedulingPref == Sched::Latency) { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + if (left->Latency != right->Latency) + return left->Latency > right->Latency; + } + return BURRSort(left, right, SPQ); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 3185c88..06cf053 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -59,7 +59,11 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { SUnits.back().OrigNode = &SUnits.back(); SUnit *SU = &SUnits.back(); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); - SU->SchedulingPref = TLI.getSchedulingPreference(N); + if (N->isMachineOpcode() && + N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) + SU->SchedulingPref = Sched::None; + else + SU->SchedulingPref = TLI.getSchedulingPreference(N); return SU; } @@ -97,7 +101,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { PhysReg = Reg; const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo)); + TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo)); Cost = RC->getCopyCost(); } } @@ -106,17 +110,42 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, SelectionDAG *DAG) { SmallVector<EVT, 4> VTs; - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) - VTs.push_back(N->getValueType(i)); + SDNode *FlagDestNode = Flag.getNode(); + + // Don't add a flag from a node to itself. + if (FlagDestNode == N) return; + + // Don't add a flag to something which already has a flag. + if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return; + + for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) + VTs.push_back(N->getValueType(I)); + if (AddFlag) VTs.push_back(MVT::Flag); + SmallVector<SDValue, 4> Ops; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - Ops.push_back(N->getOperand(i)); - if (Flag.getNode()) + for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) + Ops.push_back(N->getOperand(I)); + + if (FlagDestNode) Ops.push_back(Flag); + SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); + MachineSDNode::mmo_iterator Begin = 0, End = 0; + MachineSDNode *MN = dyn_cast<MachineSDNode>(N); + + // Store memory references. + if (MN) { + Begin = MN->memoperands_begin(); + End = MN->memoperands_end(); + } + DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size()); + + // Reset the memory references + if (MN) + MN->setMemRefs(Begin, End); } /// ClusterNeighboringLoads - Force nearby loads together by "flagging" them. @@ -124,98 +153,98 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, /// offsets are not far apart (target specific), it add MVT::Flag inputs and /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. -void ScheduleDAGSDNodes::ClusterNeighboringLoads() { +void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { + SDNode *Chain = 0; + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) + Chain = Node->getOperand(NumOps-1).getNode(); + if (!Chain) + return; + + // Look for other loads of the same chain. Find loads that are loading from + // the same base pointer and different offsets. SmallPtrSet<SDNode*, 16> Visited; SmallVector<int64_t, 4> Offsets; DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. - for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), - E = DAG->allnodes_end(); NI != E; ++NI) { - SDNode *Node = &*NI; - if (!Node || !Node->isMachineOpcode()) + bool Cluster = false; + SDNode *Base = Node; + for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); + I != E; ++I) { + SDNode *User = *I; + if (User == Node || !Visited.insert(User)) continue; - - unsigned Opc = Node->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - if (!TID.mayLoad()) + int64_t Offset1, Offset2; + if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || + Offset1 == Offset2) + // FIXME: Should be ok if they addresses are identical. But earlier + // optimizations really should have eliminated one of the loads. continue; + if (O2SMap.insert(std::make_pair(Offset1, Base)).second) + Offsets.push_back(Offset1); + O2SMap.insert(std::make_pair(Offset2, User)); + Offsets.push_back(Offset2); + if (Offset2 < Offset1) + Base = User; + Cluster = true; + } - SDNode *Chain = 0; - unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) - Chain = Node->getOperand(NumOps-1).getNode(); - if (!Chain) - continue; + if (!Cluster) + return; - // Look for other loads of the same chain. Find loads that are loading from - // the same base pointer and different offsets. - Visited.clear(); - Offsets.clear(); - O2SMap.clear(); - bool Cluster = false; - SDNode *Base = Node; - int64_t BaseOffset; - for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); - I != E; ++I) { - SDNode *User = *I; - if (User == Node || !Visited.insert(User)) - continue; - int64_t Offset1, Offset2; - if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || - Offset1 == Offset2) - // FIXME: Should be ok if they addresses are identical. But earlier - // optimizations really should have eliminated one of the loads. - continue; - if (O2SMap.insert(std::make_pair(Offset1, Base)).second) - Offsets.push_back(Offset1); - O2SMap.insert(std::make_pair(Offset2, User)); - Offsets.push_back(Offset2); - if (Offset2 < Offset1) { - Base = User; - BaseOffset = Offset2; - } else { - BaseOffset = Offset1; - } - Cluster = true; - } + // Sort them in increasing order. + std::sort(Offsets.begin(), Offsets.end()); + + // Check if the loads are close enough. + SmallVector<SDNode*, 4> Loads; + unsigned NumLoads = 0; + int64_t BaseOff = Offsets[0]; + SDNode *BaseLoad = O2SMap[BaseOff]; + Loads.push_back(BaseLoad); + for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { + int64_t Offset = Offsets[i]; + SDNode *Load = O2SMap[Offset]; + if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads)) + break; // Stop right here. Ignore loads that are further away. + Loads.push_back(Load); + ++NumLoads; + } - if (!Cluster) - continue; + if (NumLoads == 0) + return; - // Sort them in increasing order. - std::sort(Offsets.begin(), Offsets.end()); - - // Check if the loads are close enough. - SmallVector<SDNode*, 4> Loads; - unsigned NumLoads = 0; - int64_t BaseOff = Offsets[0]; - SDNode *BaseLoad = O2SMap[BaseOff]; - Loads.push_back(BaseLoad); - for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { - int64_t Offset = Offsets[i]; - SDNode *Load = O2SMap[Offset]; - if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset, - NumLoads)) - break; // Stop right here. Ignore loads that are further away. - Loads.push_back(Load); - ++NumLoads; - } + // Cluster loads by adding MVT::Flag outputs and inputs. This also + // ensure they are scheduled in order of increasing addresses. + SDNode *Lead = Loads[0]; + AddFlags(Lead, SDValue(0, 0), true, DAG); + + SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1); + for (unsigned I = 1, E = Loads.size(); I != E; ++I) { + bool OutFlag = I < E - 1; + SDNode *Load = Loads[I]; + + AddFlags(Load, InFlag, OutFlag, DAG); + + if (OutFlag) + InFlag = SDValue(Load, Load->getNumValues() - 1); + + ++LoadsClustered; + } +} - if (NumLoads == 0) +/// ClusterNodes - Cluster certain nodes which should be scheduled together. +/// +void ScheduleDAGSDNodes::ClusterNodes() { + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + SDNode *Node = &*NI; + if (!Node || !Node->isMachineOpcode()) continue; - // Cluster loads by adding MVT::Flag outputs and inputs. This also - // ensure they are scheduled in order of increasing addresses. - SDNode *Lead = Loads[0]; - AddFlags(Lead, SDValue(0,0), true, DAG); - SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1); - for (unsigned i = 1, e = Loads.size(); i != e; ++i) { - bool OutFlag = i < e-1; - SDNode *Load = Loads[i]; - AddFlags(Load, InFlag, OutFlag, DAG); - if (OutFlag) - InFlag = SDValue(Load, Load->getNumValues()-1); - ++LoadsClustered; - } + unsigned Opc = Node->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + if (TID.mayLoad()) + // Cluster loads from "near" addresses into combined SUnits. + ClusterNeighboringLoads(Node); } } @@ -364,8 +393,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (Cost >= 0) PhysReg = 0; - const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, - OpSU->Latency, PhysReg); + // If this is a ctrl dep, latency is 1. + unsigned OpLatency = isChain ? 1 : OpSU->Latency; + const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, + OpLatency, PhysReg); if (!isChain && !UnitLatencies) { ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); @@ -382,8 +413,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { - // Cluster loads from "near" addresses into combined SUnits. - ClusterNeighboringLoads(); + // Cluster certain nodes which should be scheduled together. + ClusterNodes(); // Populate the SUnits array. BuildSchedUnits(); // Compute all the scheduling dependencies between nodes. @@ -427,15 +458,18 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, return; unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); - if (Def->isMachineOpcode() && Use->isMachineOpcode()) { + if (Def->isMachineOpcode()) { const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); if (DefIdx >= II.getNumDefs()) return; int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); if (DefCycle < 0) return; - const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); - int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + int UseCycle = 1; + if (Use->isMachineOpcode()) { + const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); + UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + } if (UseCycle >= 0) { int Latency = DefCycle - UseCycle + 1; if (Latency >= 0) @@ -473,7 +507,7 @@ namespace { } // ProcessSourceNode - Process nodes with source order numbers. These are added -// to a vector which EmitSchedule use to determine how to insert dbg_value +// to a vector which EmitSchedule uses to determine how to insert dbg_value // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, @@ -485,13 +519,13 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, return; MachineBasicBlock *BB = Emitter.getBlock(); - if (BB->empty() || BB->back().isPHI()) { + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { // Did not insert any instruction. Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); return; } - Orders.push_back(std::make_pair(Order, &BB->back())); + Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); if (!N->getHasDebugValue()) return; // Opportunistically insert immediate dbg_value uses, i.e. those with source @@ -530,7 +564,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { for (; PDI != PDE; ++PDI) { MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); if (DbgMI) - BB->insert(BB->end(), DbgMI); + BB->insert(InsertPos, DbgMI); } } @@ -574,9 +608,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // Insert all the dbg_values which have not already been inserted in source // order sequence. if (HasDbg) { - MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin(); - while (BBBegin != BB->end() && BBBegin->isPHI()) - ++BBBegin; + MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); // Sort the source order instructions and use the order to insert debug // values. @@ -586,14 +618,12 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); // Now emit the rest according to source order. unsigned LastOrder = 0; - MachineInstr *LastMI = 0; for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { unsigned Order = Orders[i].first; MachineInstr *MI = Orders[i].second; // Insert all SDDbgValue's whose order(s) are before "Order". if (!MI) continue; - MachineBasicBlock *MIBB = MI->getParent(); #ifndef NDEBUG unsigned LastDIOrder = 0; #endif @@ -612,13 +642,14 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // Insert to start of the BB (after PHIs). BB->insert(BBBegin, DbgMI); else { + // Insert at the instruction, which may be in a different + // block, if the block was split by a custom inserter. MachineBasicBlock::iterator Pos = MI; - MIBB->insert(llvm::next(Pos), DbgMI); + MI->getParent()->insert(llvm::next(Pos), DbgMI); } } } LastOrder = Order; - LastMI = MI; } // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index e8714ba..842fc8c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -108,7 +108,10 @@ namespace llvm { private: /// ClusterNeighboringLoads - Cluster loads from "near" addresses into /// combined SUnits. - void ClusterNeighboringLoads(); + void ClusterNeighboringLoads(SDNode *Node); + /// ClusterNodes - Cluster certain nodes which should be scheduled together. + /// + void ClusterNodes(); /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. void BuildSchedUnits(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 38bf68b..e83a034 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -790,9 +790,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { } // EntryNode could meaningfully have debug info if we can find it... -SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli) +SelectionDAG::SelectionDAG(const TargetMachine &tm) : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - FLI(fli), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); @@ -808,7 +807,6 @@ void SelectionDAG::init(MachineFunction &mf) { SelectionDAG::~SelectionDAG() { allnodes_clear(); delete Ordering; - DbgInfo->clear(); delete DbgInfo; } @@ -835,11 +833,8 @@ void SelectionDAG::clear() { EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); - delete Ordering; - Ordering = new SDNodeOrdering(); + Ordering->clear(); DbgInfo->clear(); - delete DbgInfo; - DbgInfo = new SDDbgInfo(); } SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { @@ -980,7 +975,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { } } -SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, EVT VT, int64_t Offset, bool isTargetGA, unsigned char TargetFlags) { @@ -1015,7 +1010,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, GV, VT, + SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -2291,7 +2286,6 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, unsigned i) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); if (N->getMaskElt(i) < 0) return getUNDEF(VT.getVectorElementType()); unsigned Index = N->getMaskElt(i); @@ -2475,9 +2469,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); - if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND) + + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + + // (ext (trunx x)) -> x + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = Operand.getNode()->getOperand(0); + if (OpOp.getValueType() == VT) + return OpOp; + } break; case ISD::TRUNCATE: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2622,7 +2625,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N1.getOpcode() == ISD::BUILD_VECTOR && N2.getOpcode() == ISD::BUILD_VECTOR) { SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); - Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); } break; @@ -3011,7 +3014,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); - ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); switch (Opcode) { case ISD::CONCAT_VECTORS: // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to @@ -3020,8 +3022,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, N2.getOpcode() == ISD::BUILD_VECTOR && N3.getOpcode() == ISD::BUILD_VECTOR) { SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); - Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); - Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end()); + Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end()); return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); } break; @@ -3041,14 +3043,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N2 == N3) return N2; // select C, X, X -> X break; - case ISD::BRCOND: - if (N2C) { - if (N2C->getZExtValue()) // Unconditional branch - return getNode(ISD::BR, DL, MVT::Other, N1, N3); - else - return N1; // Never-taken branch - } - break; case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); break; @@ -3267,6 +3261,15 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, if (VT.bitsGT(LVT)) VT = LVT; } + + // If we're optimizing for size, and there is a limit, bump the maximum number + // of operations inserted down to 4. This is a wild guess that approximates + // the size of a call to memcpy or memset (3 arguments + call). + if (Limit != ~0U) { + const Function *F = DAG.getMachineFunction().getFunction(); + if (F->hasFnAttr(Attribute::OptimizeForSize)) + Limit = 4; + } unsigned NumMemOps = 0; while (Size != 0) { @@ -3321,9 +3324,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, std::string Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); - uint64_t Limit = -1ULL; - if (!AlwaysInline) - Limit = TLI.getMaxStoresPerMemcpy(); + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(); + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), (isZeroStr ? 0 : SrcAlign), @@ -3368,7 +3370,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // FIXME does the case above also need this? EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); - Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, + Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), SrcSV, SrcSVOff + SrcOff, VT, isVol, false, MinAlign(SrcAlign, SrcOff)); @@ -3401,9 +3403,6 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // below a certain threshold. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector<EVT> MemOps; - uint64_t Limit = -1ULL; - if (!AlwaysInline) - Limit = TLI.getMaxStoresPerMemmove(); bool DstAlignCanChange = false; MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); @@ -3412,6 +3411,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), @@ -3895,8 +3895,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, } SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, - ISD::LoadExtType ExtType, EVT VT, SDValue Chain, +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, const Value *SV, int SVOffset, EVT MemVT, bool isVolatile, bool isNonTemporal, @@ -3919,12 +3919,12 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, MachineMemOperand *MMO = MF.getMachineMemOperand(SV, Flags, SVOffset, MemVT.getStoreSize(), Alignment); - return getLoad(AM, dl, ExtType, VT, Chain, Ptr, Offset, MemVT, MMO); + return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, - ISD::LoadExtType ExtType, EVT VT, SDValue Chain, +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, EVT MemVT, MachineMemOperand *MMO) { if (VT == MemVT) { @@ -3974,18 +3974,18 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, bool isVolatile, bool isNonTemporal, unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); - return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef, + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); - return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef, + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); } @@ -3995,7 +3995,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); assert(LD->getOffset().getOpcode() == ISD::UNDEF && "Load is already a indexed load!"); - return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(), + return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); @@ -4141,9 +4141,10 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, - SDValue SV) { - SDValue Ops[] = { Chain, Ptr, SV }; - return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3); + SDValue SV, + unsigned Align) { + SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) }; + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4); } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, @@ -4425,17 +4426,16 @@ SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { /// already exists. If the resultant node does not exist in the DAG, the /// input node is returned. As a degenerate case, if you specify the same /// input operands as the node already has, the input node is returned. -SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) { - SDNode *N = InN.getNode(); +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); // Check to see if there is no change. - if (Op == N->getOperand(0)) return InN; + if (Op == N->getOperand(0)) return N; // See if the modified node already exists. void *InsertPos = 0; if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) - return SDValue(Existing, InN.getResNo()); + return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) @@ -4447,22 +4447,20 @@ SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) { // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); - return InN; + return N; } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) { - SDNode *N = InN.getNode(); +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); // Check to see if there is no change. if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) - return InN; // No operands changed, just return the input node. + return N; // No operands changed, just return the input node. // See if the modified node already exists. void *InsertPos = 0; if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) - return SDValue(Existing, InN.getResNo()); + return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) @@ -4477,32 +4475,31 @@ UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) { // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); - return InN; + return N; } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) { +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { SDValue Ops[] = { Op1, Op2, Op3 }; return UpdateNodeOperands(N, Ops, 3); } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4) { SDValue Ops[] = { Op1, Op2, Op3, Op4 }; return UpdateNodeOperands(N, Ops, 4); } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4, SDValue Op5) { SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; return UpdateNodeOperands(N, Ops, 5); } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { - SDNode *N = InN.getNode(); +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) { assert(N->getNumOperands() == NumOps && "Update with wrong number of operands"); @@ -4516,12 +4513,12 @@ UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { } // No operands changed, just return the input node. - if (!AnyChange) return InN; + if (!AnyChange) return N; // See if the modified node already exists. void *InsertPos = 0; if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) - return SDValue(Existing, InN.getResNo()); + return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) @@ -4535,7 +4532,7 @@ UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); - return InN; + return N; } /// DropOperands - Release the operands and set this node to have @@ -5378,9 +5375,10 @@ HandleSDNode::~HandleSDNode() { DropOperands(); } -GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, +GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL, + const GlobalValue *GA, EVT VT, int64_t o, unsigned char TF) - : SDNode(Opc, DebugLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) { + : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } @@ -5669,13 +5667,16 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; case ISD::FCOS: return "fcos"; - case ISD::FPOWI: return "fpowi"; - case ISD::FPOW: return "fpow"; case ISD::FTRUNC: return "ftrunc"; case ISD::FFLOOR: return "ffloor"; case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FEXP: return "fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::FLOG: return "flog"; + case ISD::FLOG2: return "flog2"; + case ISD::FLOG10: return "flog10"; // Binary operators case ISD::ADD: return "add"; @@ -5706,7 +5707,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; + case ISD::FPOW: return "fpow"; + case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; case ISD::VSETCC: return "vsetcc"; case ISD::SELECT: return "select"; @@ -6260,23 +6263,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo(); unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), FrameOffset); - if (MFI.isFixedObjectIndex(FrameIdx)) { - int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; - - // The alignment of the frame index can be determined from its offset from - // the incoming frame position. If the frame object is at offset 32 and - // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. - unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment(); - unsigned Align = MinAlign(ObjectOffset, StackAlign); - - // Finally, the frame object itself may have a known alignment. Factor - // the alignment + offset into a new alignment. For example, if we know - // the FI is 8 byte aligned, but the pointer is 4 off, we really have a - // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte - // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. - return std::max(Align, FIInfoAlign); - } return FIInfoAlign; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fbe601f..d323c16 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "isel" #include "SDNodeDbgValue.h" #include "SelectionDAGBuilder.h" -#include "FunctionLoweringInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -32,6 +31,7 @@ #include "llvm/Module.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFunction.h" @@ -70,113 +70,6 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); -namespace { - /// RegsForValue - This struct represents the registers (physical or virtual) - /// that a particular set of values is assigned, and the type information - /// about the value. The most common situation is to represent one value at a - /// time, but struct or array values are handled element-wise as multiple - /// values. The splitting of aggregates is performed recursively, so that we - /// never have aggregate-typed registers. The values at this point do not - /// necessarily have legal types, so each value may require one or more - /// registers of some legal type. - /// - struct RegsForValue { - /// TLI - The TargetLowering object. - /// - const TargetLowering *TLI; - - /// ValueVTs - The value types of the values, which may not be legal, and - /// may need be promoted or synthesized from one or more registers. - /// - SmallVector<EVT, 4> ValueVTs; - - /// RegVTs - The value types of the registers. This is the same size as - /// ValueVTs and it records, for each value, what the type of the assigned - /// register or registers are. (Individual values are never synthesized - /// from more than one type of register.) - /// - /// With virtual registers, the contents of RegVTs is redundant with TLI's - /// getRegisterType member function, however when with physical registers - /// it is necessary to have a separate record of the types. - /// - SmallVector<EVT, 4> RegVTs; - - /// Regs - This list holds the registers assigned to the values. - /// Each legal or promoted value requires one register, and each - /// expanded value requires multiple registers. - /// - SmallVector<unsigned, 4> Regs; - - RegsForValue() : TLI(0) {} - - RegsForValue(const TargetLowering &tli, - const SmallVector<unsigned, 4> ®s, - EVT regvt, EVT valuevt) - : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - RegsForValue(const TargetLowering &tli, - const SmallVector<unsigned, 4> ®s, - const SmallVector<EVT, 4> ®vts, - const SmallVector<EVT, 4> &valuevts) - : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} - RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, const Type *Ty) : TLI(&tli) { - ComputeValueVTs(tli, Ty, ValueVTs); - - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT); - EVT RegisterVT = TLI->getRegisterType(Context, ValueVT); - for (unsigned i = 0; i != NumRegs; ++i) - Regs.push_back(Reg + i); - RegVTs.push_back(RegisterVT); - Reg += NumRegs; - } - } - - /// areValueTypesLegal - Return true if types of all the values are legal. - bool areValueTypesLegal() { - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT RegisterVT = RegVTs[Value]; - if (!TLI->isTypeLegal(RegisterVT)) - return false; - } - return true; - } - - - /// append - Add the specified values to this one. - void append(const RegsForValue &RHS) { - TLI = RHS.TLI; - ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); - RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); - Regs.append(RHS.Regs.begin(), RHS.Regs.end()); - } - - - /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from - /// this value and returns the result as a ValueVTs value. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; - - /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the - /// specified value into the registers specified by this object. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; - - /// AddInlineAsmOperands - Add this value to the specified inlineasm node - /// operand list. This adds the code marker, matching input operand index - /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector<SDValue> &Ops) const; - }; -} - /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger then ValueVT then AssertOp can be used to specify whether the extra @@ -528,6 +421,268 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, } } +namespace { + /// RegsForValue - This struct represents the registers (physical or virtual) + /// that a particular set of values is assigned, and the type information + /// about the value. The most common situation is to represent one value at a + /// time, but struct or array values are handled element-wise as multiple + /// values. The splitting of aggregates is performed recursively, so that we + /// never have aggregate-typed registers. The values at this point do not + /// necessarily have legal types, so each value may require one or more + /// registers of some legal type. + /// + struct RegsForValue { + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector<EVT, 4> ValueVTs; + + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector<EVT, 4> RegVTs; + + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector<unsigned, 4> Regs; + + RegsForValue() {} + + RegsForValue(const SmallVector<unsigned, 4> ®s, + EVT regvt, EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + + RegsForValue(const SmallVector<unsigned, 4> ®s, + const SmallVector<EVT, 4> ®vts, + const SmallVector<EVT, 4> &valuevts) + : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} + + RegsForValue(LLVMContext &Context, const TargetLowering &tli, + unsigned Reg, const Type *Ty) { + ComputeValueVTs(tli, Ty, ValueVTs); + + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); + EVT RegisterVT = tli.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } + } + + /// areValueTypesLegal - Return true if types of all the values are legal. + bool areValueTypesLegal(const TargetLowering &TLI) { + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT RegisterVT = RegVTs[Value]; + if (!TLI.isTypeLegal(RegisterVT)) + return false; + } + return true; + } + + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the + /// specified value into the registers specified by this object. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Kind, + bool HasMatching, unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const; + }; +} + +/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from +/// this value and returns the result as a ValueVT value. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, + FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Assemble the legal parts into the final values. + SmallVector<SDValue, 4> Values(ValueVTs.size()); + SmallVector<SDValue, 8> Parts; + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + // Copy the legal parts from the registers. + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; + + Parts.resize(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue P; + if (Flag == 0) { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); + } else { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); + *Flag = P.getValue(2); + } + + Chain = P.getValue(1); + + // If the source register was virtual and if we know something about it, + // add an assert node. + if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && + RegisterVT.isInteger() && !RegisterVT.isVector()) { + unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; + if (FuncInfo.LiveOutRegInfo.size() > SlotNo) { + const FunctionLoweringInfo::LiveOutInfo &LOI = + FuncInfo.LiveOutRegInfo[SlotNo]; + + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI.NumSignBits; + unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + EVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + + if (FromVT != MVT::Other) + P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); + } + } + + Parts[i] = P; + } + + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), + NumRegs, RegisterVT, ValueVT); + Part += NumRegs; + Parts.clear(); + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Values[0], ValueVTs.size()); +} + +/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the +/// specified value into the registers specified by this object. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Get the list of the values's legal parts. + unsigned NumRegs = Regs.size(); + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; + + getCopyToParts(DAG, dl, + Val.getValue(Val.getResNo() + Value), + &Parts[Part], NumParts, RegisterVT); + Part += NumParts; + } + + // Copy the parts into the registers. + SmallVector<SDValue, 8> Chains(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue Part; + if (Flag == 0) { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); + } else { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); + *Flag = Part.getValue(1); + } + + Chains[i] = Part.getValue(0); + } + + if (NumRegs == 1 || Flag) + // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is + // flagged to it. That is the CopyToReg nodes and the user are considered + // a single scheduling unit. If we create a TokenFactor and return it as + // chain, then the TokenFactor is both a predecessor (operand) of the + // user as well as a successor (the TF operands are flagged to the user). + // c1, f1 = CopyToReg + // c2, f2 = CopyToReg + // c3 = TokenFactor c1, c2 + // ... + // = op c3, ..., f2 + Chain = Chains[NumRegs-1]; + else + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); +} + +/// AddInlineAsmOperands - Add this value to the specified inlineasm node +/// operand list. This adds the code marker and includes the number of +/// values added into it. +void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); + if (HasMatching) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); + SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); + Ops.push_back(Res); + + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + EVT RegisterVT = RegVTs[Value]; + for (unsigned i = 0; i != NumRegs; ++i) { + assert(Reg < Regs.size() && "Mismatch in # registers expected"); + Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + } + } +} void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { AA = &aa; @@ -543,6 +698,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { /// consumed. void SelectionDAGBuilder::clear() { NodeMap.clear(); + UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); CurDebugLoc = DebugLoc(); @@ -649,27 +805,63 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { } } +// getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { + // If we already have an SDValue for this value, use it. It's important + // to do this first, so that we don't create a CopyFromReg if we already + // have a regular SDValue. + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + // If there's a virtual register allocated and initialized for this + // value, use it. + DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); + if (It != FuncInfo.ValueMap.end()) { + unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + } + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + return Val; +} + +/// getNonRegisterValue - Return an SDValue for the given Value, but +/// don't look in FuncInfo.ValueMap for a virtual register. +SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { + // If we already have an SDValue for this value, use it. SDValue &N = NodeMap[V]; if (N.getNode()) return N; + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + return Val; +} + +/// getValueImpl - Helper function for getValue and getMaterializedValue. +/// Create an SDValue for the given value. +SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const Constant *C = dyn_cast<Constant>(V)) { EVT VT = TLI.getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) - return N = DAG.getConstant(*CI, VT); + return DAG.getConstant(*CI, VT); if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) - return N = DAG.getGlobalAddress(GV, VT); + return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT); if (isa<ConstantPointerNull>(C)) - return N = DAG.getConstant(0, TLI.getPointerTy()); + return DAG.getConstant(0, TLI.getPointerTy()); if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) - return N = DAG.getConstantFP(*CFP, VT); + return DAG.getConstantFP(*CFP, VT); if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) - return N = DAG.getUNDEF(VT); + return DAG.getUNDEF(VT); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { visit(CE->getOpcode(), *CE); @@ -757,82 +949,25 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); } - unsigned InReg = FuncInfo.ValueMap[V]; - assert(InReg && "Value not in map!"); - - RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); - SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); -} - -/// Get the EVTs and ArgFlags collections that represent the legalized return -/// type of the given function. This does not require a DAG or a return value, -/// and is suitable for use before any DAGs for the function are constructed. -static void getReturnInfo(const Type* ReturnType, - Attributes attr, SmallVectorImpl<EVT> &OutVTs, - SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags, - const TargetLowering &TLI, - SmallVectorImpl<uint64_t> *Offsets = 0) { - SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, ReturnType, ValueVTs); - unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) return; - unsigned Offset = 0; - - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - - if (attr & Attribute::SExt) - ExtendKind = ISD::SIGN_EXTEND; - else if (attr & Attribute::ZExt) - ExtendKind = ISD::ZERO_EXTEND; - - // FIXME: C calling convention requires the return type to be promoted to - // at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } - - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); - unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( - PartVT.getTypeForEVT(ReturnType->getContext())); - - // 'inreg' on function refers to return value - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr & Attribute::InReg) - Flags.setInReg(); - - // Propagate extension type if any - if (attr & Attribute::SExt) - Flags.setSExt(); - else if (attr & Attribute::ZExt) - Flags.setZExt(); - - for (unsigned i = 0; i < NumParts; ++i) { - OutVTs.push_back(PartVT); - OutFlags.push_back(Flags); - if (Offsets) - { - Offsets->push_back(Offset); - Offset += PartSize; - } - } + // If this is an instruction which fast-isel has deferred, select it now. + if (const Instruction *Inst = dyn_cast<Instruction>(V)) { + unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); } + + llvm_unreachable("Can't get register for value!"); + return SDValue(); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SDValue Chain = getControlRoot(); SmallVector<ISD::OutputArg, 8> Outs; - FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); + SmallVector<SDValue, 8> OutVals; - if (!FLI.CanLowerReturn) { - unsigned DemoteReg = FLI.DemoteRegister; + if (!FuncInfo.CanLowerReturn) { + unsigned DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); // Emit a store of the return value through the virtual register. @@ -908,8 +1043,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { else if (F->paramHasAttr(0, Attribute::ZExt)) Flags.setZExt(); - for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), + /*isfixed=*/true)); + OutVals.push_back(Parts[i]); + } } } } @@ -918,7 +1056,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, - Outs, getCurDebugLoc(), DAG); + Outs, OutVals, getCurDebugLoc(), DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -1119,7 +1257,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ } void SelectionDAGBuilder::visitBr(const BranchInst &I) { - MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *BrMBB = FuncInfo.MBB; // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; @@ -1269,18 +1407,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); - // If the branch was constant folded, fix up the CFG. - if (BrCond.getOpcode() == ISD::BR) { - SwitchBB->removeSuccessor(CB.FalseBB); - } else { - // Otherwise, go ahead and insert the false branch. - if (BrCond == getControlRoot()) - SwitchBB->removeSuccessor(CB.TrueBB); - - if (CB.FalseBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, - DAG.getBasicBlock(CB.FalseBB)); - } + // Insert the false branch. + if (CB.FalseBB != NextBlock) + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB)); DAG.setRoot(BrCond); } @@ -1319,7 +1449,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // therefore require extension or truncating. SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); - unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); + unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1370,7 +1500,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); - B.Reg = FuncInfo.MakeReg(TLI.getPointerTy()); + B.Reg = FuncInfo.CreateReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), B.Reg, ShiftOp); @@ -1402,29 +1532,41 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { - // Make desired shift SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, TLI.getPointerTy()); - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), - TLI.getPointerTy(), - DAG.getConstant(1, TLI.getPointerTy()), - ShiftOp); - - // Emit bit tests and jumps - SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), - TLI.getPointerTy(), SwitchVal, - DAG.getConstant(B.Mask, TLI.getPointerTy())); - SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(AndOp.getValueType()), - AndOp, DAG.getConstant(0, TLI.getPointerTy()), - ISD::SETNE); + SDValue Cmp; + if (CountPopulation_64(B.Mask) == 1) { + // Testing for a single bit; just compare the shift count with what it + // would need to be to shift a 1 bit in that position. + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(ShiftOp.getValueType()), + ShiftOp, + DAG.getConstant(CountTrailingZeros_64(B.Mask), + TLI.getPointerTy()), + ISD::SETEQ); + } else { + // Make desired shift + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(1, TLI.getPointerTy()), + ShiftOp); + + // Emit bit tests and jumps + SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + TLI.getPointerTy(), SwitchVal, + DAG.getConstant(B.Mask, TLI.getPointerTy())); + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(AndOp.getValueType()), + AndOp, DAG.getConstant(0, TLI.getPointerTy()), + ISD::SETNE); + } SwitchBB->addSuccessor(B.TargetBB); SwitchBB->addSuccessor(NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), - AndCmp, DAG.getBasicBlock(B.TargetBB)); + Cmp, DAG.getBasicBlock(B.TargetBB)); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1441,7 +1583,7 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, } void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { - MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *InvokeMBB = FuncInfo.MBB; // Retrieve successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; @@ -1969,7 +2111,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, } void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { - MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()]; + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; @@ -2035,7 +2177,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { } void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { - MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. SmallVector<BasicBlock*, 32> succs; @@ -2245,7 +2387,6 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT SrcVT = N.getValueType(); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } @@ -2254,7 +2395,6 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT SrcVT = N.getValueType(); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } @@ -2579,7 +2719,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { - if (CI->getZExtValue() == 0) continue; + if (CI->isZero()) continue; uint64_t Offs = TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; @@ -2643,12 +2783,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue AllocSize = getValue(I.getArraySize()); - AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(), - AllocSize, - DAG.getConstant(TySize, AllocSize.getValueType())); - EVT IntPtr = TLI.getPointerTy(); - AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + if (AllocSize.getValueType() != IntPtr) + AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + + AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, + AllocSize, + DAG.getConstant(TySize, IntPtr)); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -2804,8 +2945,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. - for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { - SDValue Op = getValue(I.getOperand(i)); + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + SDValue Op = getValue(I.getArgOperand(i)); assert(TLI.isTypeLegal(Op.getValueType()) && "Intrinsic uses a non-legal type?"); Ops.push_back(Op); @@ -2910,11 +3051,11 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, SDValue Root = getRoot(); SDValue L = DAG.getAtomic(Op, getCurDebugLoc(), - getValue(I.getOperand(2)).getValueType().getSimpleVT(), + getValue(I.getArgOperand(1)).getValueType().getSimpleVT(), Root, - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - I.getOperand(1)); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + I.getArgOperand(0)); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -2923,8 +3064,8 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, // implVisitAluOverflow - Lower arithmetic overflow instrinsics. const char * SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); @@ -2938,9 +3079,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); // Put the exponent in the right bit position for later addition to the // final result: @@ -3050,8 +3191,8 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FEXP, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3064,9 +3205,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Scale the exponent by log(2) [0.69314718f]. @@ -3160,8 +3301,8 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FLOG, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3174,9 +3315,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Get the exponent. @@ -3269,8 +3410,8 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FLOG2, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3283,9 +3424,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. @@ -3371,8 +3512,8 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FLOG10, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3385,9 +3526,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); @@ -3485,8 +3626,8 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FEXP2, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3497,12 +3638,12 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { void SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue result; - const Value *Val = I.getOperand(1); + const Value *Val = I.getArgOperand(0); DebugLoc dl = getCurDebugLoc(); bool IsExp10 = false; if (getValue(Val).getValueType() == MVT::f32 && - getValue(I.getOperand(2)).getValueType() == MVT::f32 && + getValue(I.getArgOperand(1)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) { if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { @@ -3513,7 +3654,7 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { } if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(2)); + SDValue Op = getValue(I.getArgOperand(1)); // Put the exponent in the right bit position for later addition to the // final result: @@ -3618,9 +3759,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FPOW, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1))); } setValue(&I, result); @@ -3696,7 +3837,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, if (DV.isInlinedFnArgument(MF.getFunction())) return false; - MachineBasicBlock *MBB = FuncInfo.MBBMap[DI.getParent()]; + MachineBasicBlock *MBB = FuncInfo.MBB; if (MBB != &MF.front()) return false; @@ -3750,11 +3891,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vacopy: visitVACopy(I); return 0; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::setjmp: return "_setjmp"+!TLI.usesUnderscoreSetJmp(); @@ -3763,63 +3904,63 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. - assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace() + assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() < 256 && - cast<PointerType>(I.getOperand(2)->getType())->getAddressSpace() + cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - SDValue Op3 = getValue(I.getOperand(3)); - unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); - bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, - I.getOperand(1), 0, I.getOperand(2), 0)); + I.getArgOperand(0), 0, I.getArgOperand(1), 0)); return 0; } case Intrinsic::memset: { // Assert for address < 256 since we support only user defined address // spaces. - assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace() + assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - SDValue Op3 = getValue(I.getOperand(3)); - unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); - bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getOperand(1), 0)); + I.getArgOperand(0), 0)); return 0; } case Intrinsic::memmove: { // Assert for address < 256 since we support only user defined address // spaces. - assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace() + assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() < 256 && - cast<PointerType>(I.getOperand(2)->getType())->getAddressSpace() + cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - SDValue Op3 = getValue(I.getOperand(3)); - unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); - bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); // If the source and destination are known to not be aliases, we can // lower memmove as memcpy. uint64_t Size = -1ULL; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3)) Size = C->getZExtValue(); - if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) == + if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) == AliasAnalysis::NoAlias) { DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - false, I.getOperand(1), 0, I.getOperand(2), 0)); + false, I.getArgOperand(0), 0, I.getArgOperand(1), 0)); return 0; } DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getOperand(1), 0, I.getOperand(2), 0)); + I.getArgOperand(0), 0, I.getArgOperand(1), 0)); return 0; } case Intrinsic::dbg_declare: { @@ -3908,7 +4049,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { bool createUndef = false; // FIXME : Why not use getValue() directly ? - SDValue &N = NodeMap[V]; + SDValue N = NodeMap[V]; + if (!N.getNode() && isa<Argument>(V)) + // Check unused arguments map. + N = UnusedArgNodeMap[V]; if (N.getNode()) { if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { SDV = DAG.getDbgValue(Variable, N.getNode(), @@ -3956,7 +4100,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_exception: { // Insert the EXCEPTIONADDR instruction. - assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() && + assert(FuncInfo.MBB->isLandingPad() && "Call to eh.exception not in landing pad!"); SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[1]; @@ -3968,7 +4112,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_selector: { - MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *CallMBB = FuncInfo.MBB; MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); if (CallMBB->isLandingPad()) AddCatchInfo(I, &MMI, CallMBB); @@ -3978,13 +4122,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { #endif // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg); + if (Reg) FuncInfo.MBB->addLiveIn(Reg); } // Insert the EHSELECTION instruction. SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[2]; - Ops[0] = getValue(I.getOperand(1)); + Ops[0] = getValue(I.getArgOperand(0)); Ops[1] = getRoot(); SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); DAG.setRoot(Op.getValue(1)); @@ -3994,7 +4138,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. - GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); + GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, MVT::i32); setValue(&I, Res); @@ -4007,15 +4151,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, MVT::Other, getControlRoot(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2)))); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); return 0; case Intrinsic::eh_unwind_init: DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); return 0; case Intrinsic::eh_dwarf_cfa: { - EVT VT = getValue(I.getOperand(1)).getValueType(); - SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl, + SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl, TLI.getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), @@ -4031,7 +4174,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); @@ -4040,13 +4183,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_setjmp: { setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; } case Intrinsic::eh_sjlj_longjmp: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, getRoot(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; } @@ -4072,34 +4215,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } EVT DestVT = TLI.getValueType(I.getType()); - const Value *Op1 = I.getOperand(1); + const Value *Op1 = I.getArgOperand(0); Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), - getValue(I.getOperand(2)), - getValue(I.getOperand(3)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Code); setValue(&I, Res); return 0; } case Intrinsic::sqrt: setValue(&I, DAG.getNode(ISD::FSQRT, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::powi: - setValue(&I, ExpandPowI(dl, getValue(I.getOperand(1)), - getValue(I.getOperand(2)), DAG)); + setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG)); return 0; case Intrinsic::sin: setValue(&I, DAG.getNode(ISD::FSIN, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::cos: setValue(&I, DAG.getNode(ISD::FCOS, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::log: visitLog(I); @@ -4121,14 +4264,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, - MVT::i16, getValue(I.getOperand(1)))); + MVT::i16, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::convert_from_fp16: setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl, - MVT::f32, getValue(I.getOperand(1)))); + MVT::f32, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::pcmarker: { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); return 0; } @@ -4143,23 +4286,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::cttz: { - SDValue Arg = getValue(I.getOperand(1)); + SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg)); return 0; } case Intrinsic::ctlz: { - SDValue Arg = getValue(I.getOperand(1)); + SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg)); return 0; } case Intrinsic::ctpop: { - SDValue Arg = getValue(I.getOperand(1)); + SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg)); return 0; @@ -4173,7 +4316,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::stackrestore: { - Res = getValue(I.getOperand(1)); + Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res)); return 0; } @@ -4183,8 +4326,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachineFrameInfo *MFI = MF.getFrameInfo(); EVT PtrTy = TLI.getPointerTy(); - SDValue Src = getValue(I.getOperand(1)); // The guard's value. - AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); + SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. + AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); int FI = FuncInfo.StaticAllocaMap[Slot]; MFI->setStackProtectorIndex(FI); @@ -4201,14 +4344,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::objectsize: { // If we don't know by now, we're never going to know. - ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2)); + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); assert(CI && "Non-constant type in __builtin_object_size?"); - SDValue Arg = getValue(I.getOperand(0)); + SDValue Arg = getValue(I.getCalledValue()); EVT Ty = Arg.getValueType(); - if (CI->getZExtValue() == 0) + if (CI->isZero()) Res = DAG.getConstant(-1ULL, Ty); else Res = DAG.getConstant(0, Ty); @@ -4221,14 +4364,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::init_trampoline: { - const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts()); + const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); SDValue Ops[6]; Ops[0] = getRoot(); - Ops[1] = getValue(I.getOperand(1)); - Ops[2] = getValue(I.getOperand(2)); - Ops[3] = getValue(I.getOperand(3)); - Ops[4] = DAG.getSrcValue(I.getOperand(1)); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); Res = DAG.getNode(ISD::TRAMPOLINE, dl, @@ -4241,8 +4384,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::gcroot: if (GFI) { - const Value *Alloca = I.getOperand(1); - const Constant *TypeMap = cast<Constant>(I.getOperand(2)); + const Value *Alloca = I.getArgOperand(0); + const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); @@ -4274,9 +4417,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::prefetch: { SDValue Ops[4]; Ops[0] = getRoot(); - Ops[1] = getValue(I.getOperand(1)); - Ops[2] = getValue(I.getOperand(2)); - Ops[3] = getValue(I.getOperand(3)); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); return 0; } @@ -4285,7 +4428,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[6]; Ops[0] = getRoot(); for (int x = 1; x < 6; ++x) - Ops[x] = getValue(I.getOperand(x)); + Ops[x] = getValue(I.getArgOperand(x - 1)); DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6)); return 0; @@ -4294,12 +4437,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Root = getRoot(); SDValue L = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(), - getValue(I.getOperand(2)).getValueType().getSimpleVT(), + getValue(I.getArgOperand(1)).getValueType().getSimpleVT(), Root, - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - getValue(I.getOperand(3)), - I.getOperand(1)); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), + I.getArgOperand(0)); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -4353,14 +4496,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Args.reserve(CS.arg_size()); // Check whether the function can return without sret-demotion. - SmallVector<EVT, 4> OutVTs; - SmallVector<ISD::ArgFlagsTy, 4> OutsFlags; + SmallVector<ISD::OutputArg, 4> Outs; SmallVector<uint64_t, 4> Offsets; - getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), - OutVTs, OutsFlags, TLI, &Offsets); + GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), + Outs, TLI, &Offsets); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - FTy->isVarArg(), OutVTs, OutsFlags, DAG); + FTy->isVarArg(), Outs, FTy->getContext()); SDValue DemoteStackSlot; @@ -4453,7 +4595,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, ComputeValueVTs(TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; - unsigned NumValues = OutVTs.size(); + unsigned NumValues = Outs.size(); SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(NumValues); @@ -4461,7 +4603,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second, + SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, Add, NULL, Offsets[i], false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); @@ -4580,16 +4722,16 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, /// lowered like a normal call. bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) - if (I.getNumOperands() != 4) + if (I.getNumArgOperands() != 3) return false; - const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2); + const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || - !I.getOperand(3)->getType()->isIntegerTy() || + !I.getArgOperand(2)->getType()->isIntegerTy() || !I.getType()->isIntegerTy()) return false; - const ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3)); + const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2)); // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 @@ -4656,11 +4798,16 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { void SelectionDAGBuilder::visitCall(const CallInst &I) { + // Handle inline assembly differently. + if (isa<InlineAsm>(I.getCalledValue())) { + visitInlineAsm(&I); + return; + } + const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { - const TargetIntrinsicInfo *II = TM.getIntrinsicInfo(); - if (II) { + if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { if (unsigned IID = II->getIntrinsicID(F)) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) @@ -4679,51 +4826,51 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (!F->hasLocalLinkage() && F->hasName()) { StringRef Name = F->getName(); if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") { - if (I.getNumOperands() == 3 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && - I.getType() == I.getOperand(2)->getType()) { - SDValue LHS = getValue(I.getOperand(1)); - SDValue RHS = getValue(I.getOperand(2)); + if (I.getNumArgOperands() == 2 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.getType() == I.getArgOperand(1)->getType()) { + SDValue LHS = getValue(I.getArgOperand(0)); + SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), LHS.getValueType(), LHS, RHS)); return; } } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType()) { - SDValue Tmp = getValue(I.getOperand(1)); + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; @@ -4733,14 +4880,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } } - } else if (isa<InlineAsm>(I.getOperand(0))) { - visitInlineAsm(&I); - return; } - + SDValue Callee; if (!RenameFn) - Callee = getValue(I.getOperand(0)); + Callee = getValue(I.getCalledValue()); else Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); @@ -4749,210 +4893,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LowerCallTo(&I, Callee, I.isTailCall()); } -/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from -/// this value and returns the result as a ValueVT value. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. -SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { - // Assemble the legal parts into the final values. - SmallVector<SDValue, 4> Values(ValueVTs.size()); - SmallVector<SDValue, 8> Parts; - for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - // Copy the legal parts from the registers. - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; - - Parts.resize(NumRegs); - for (unsigned i = 0; i != NumRegs; ++i) { - SDValue P; - if (Flag == 0) { - P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); - } else { - P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); - *Flag = P.getValue(2); - } - - Chain = P.getValue(1); - - // If the source register was virtual and if we know something about it, - // add an assert node. - if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && - RegisterVT.isInteger() && !RegisterVT.isVector()) { - unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; - FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); - if (FLI.LiveOutRegInfo.size() > SlotNo) { - FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo]; - - unsigned RegSize = RegisterVT.getSizeInBits(); - unsigned NumSignBits = LOI.NumSignBits; - unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); - - // FIXME: We capture more information than the dag can represent. For - // now, just use the tightest assertzext/assertsext possible. - bool isSExt = true; - EVT FromVT(MVT::Other); - if (NumSignBits == RegSize) - isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 - else if (NumZeroBits >= RegSize-1) - isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 - else if (NumSignBits > RegSize-8) - isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 - else if (NumZeroBits >= RegSize-8) - isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 - else if (NumSignBits > RegSize-16) - isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 - else if (NumZeroBits >= RegSize-16) - isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 - else if (NumSignBits > RegSize-32) - isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 - else if (NumZeroBits >= RegSize-32) - isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 - - if (FromVT != MVT::Other) - P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, - RegisterVT, P, DAG.getValueType(FromVT)); - } - } - - Parts[i] = P; - } - - Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), - NumRegs, RegisterVT, ValueVT); - Part += NumRegs; - Parts.clear(); - } - - return DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&ValueVTs[0], ValueVTs.size()), - &Values[0], ValueVTs.size()); -} - -/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the -/// specified value into the registers specified by this object. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. -void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { - // Get the list of the values's legal parts. - unsigned NumRegs = Regs.size(); - SmallVector<SDValue, 8> Parts(NumRegs); - for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; - - getCopyToParts(DAG, dl, - Val.getValue(Val.getResNo() + Value), - &Parts[Part], NumParts, RegisterVT); - Part += NumParts; - } - - // Copy the parts into the registers. - SmallVector<SDValue, 8> Chains(NumRegs); - for (unsigned i = 0; i != NumRegs; ++i) { - SDValue Part; - if (Flag == 0) { - Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); - } else { - Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); - *Flag = Part.getValue(1); - } - - Chains[i] = Part.getValue(0); - } - - if (NumRegs == 1 || Flag) - // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is - // flagged to it. That is the CopyToReg nodes and the user are considered - // a single scheduling unit. If we create a TokenFactor and return it as - // chain, then the TokenFactor is both a predecessor (operand) of the - // user as well as a successor (the TF operands are flagged to the user). - // c1, f1 = CopyToReg - // c2, f2 = CopyToReg - // c3 = TokenFactor c1, c2 - // ... - // = op c3, ..., f2 - Chain = Chains[NumRegs-1]; - else - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); -} - -/// AddInlineAsmOperands - Add this value to the specified inlineasm node -/// operand list. This adds the code marker and includes the number of -/// values added into it. -void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, - unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector<SDValue> &Ops) const { - unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); - if (HasMatching) - Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); - SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); - Ops.push_back(Res); - - for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { - unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]); - EVT RegisterVT = RegVTs[Value]; - for (unsigned i = 0; i != NumRegs; ++i) { - assert(Reg < Regs.size() && "Mismatch in # registers expected"); - Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); - } - } -} - -/// isAllocatableRegister - If the specified register is safe to allocate, -/// i.e. it isn't a stack pointer or some other special register, return the -/// register class for the register. Otherwise, return null. -static const TargetRegisterClass * -isAllocatableRegister(unsigned Reg, MachineFunction &MF, - const TargetLowering &TLI, - const TargetRegisterInfo *TRI) { - EVT FoundVT = MVT::Other; - const TargetRegisterClass *FoundRC = 0; - for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), - E = TRI->regclass_end(); RCI != E; ++RCI) { - EVT ThisVT = MVT::Other; - - const TargetRegisterClass *RC = *RCI; - // If none of the value types for this register class are valid, we - // can't use it. For example, 64-bit reg classes on 32-bit targets. - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (TLI.isTypeLegal(*I)) { - // If we have already found this register in a different register class, - // choose the one with the largest VT specified. For example, on - // PowerPC, we favor f64 register classes over f32. - if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { - ThisVT = *I; - break; - } - } - } - - if (ThisVT == MVT::Other) continue; - - // NOTE: This isn't ideal. In particular, this might allocate the - // frame pointer in functions that need it (due to them not being taken - // out of allocation, because a variable sized allocation hasn't been seen - // yet). This is a slight code pessimization, but should still work. - for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), - E = RC->allocation_order_end(MF); I != E; ++I) - if (*I == Reg) { - // We found a matching register class. Keep looking at others in case - // we find one with larger registers that this physreg is also in. - FoundRC = RC; - FoundVT = ThisVT; - break; - } - } - return FoundRC; -} - - namespace llvm { + /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : @@ -5041,8 +4983,56 @@ private: Regs.insert(*Aliases); } }; + } // end llvm namespace. +/// isAllocatableRegister - If the specified register is safe to allocate, +/// i.e. it isn't a stack pointer or some other special register, return the +/// register class for the register. Otherwise, return null. +static const TargetRegisterClass * +isAllocatableRegister(unsigned Reg, MachineFunction &MF, + const TargetLowering &TLI, + const TargetRegisterInfo *TRI) { + EVT FoundVT = MVT::Other; + const TargetRegisterClass *FoundRC = 0; + for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), + E = TRI->regclass_end(); RCI != E; ++RCI) { + EVT ThisVT = MVT::Other; + + const TargetRegisterClass *RC = *RCI; + // If none of the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (TLI.isTypeLegal(*I)) { + // If we have already found this register in a different register class, + // choose the one with the largest VT specified. For example, on + // PowerPC, we favor f64 register classes over f32. + if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { + ThisVT = *I; + break; + } + } + } + + if (ThisVT == MVT::Other) continue; + + // NOTE: This isn't ideal. In particular, this might allocate the + // frame pointer in functions that need it (due to them not being taken + // out of allocation, because a variable sized allocation hasn't been seen + // yet). This is a slight code pessimization, but should still work. + for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), + E = RC->allocation_order_end(MF); I != E; ++I) + if (*I == Reg) { + // We found a matching register class. Keep looking at others in case + // we find one with larger registers that this physreg is also in. + FoundRC = RC; + FoundVT = ThisVT; + break; + } + } + return FoundRC; +} /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the @@ -5154,7 +5144,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, } } - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; @@ -5172,7 +5162,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, for (; NumRegs; --NumRegs) Regs.push_back(RegInfo.createVirtualRegister(RC)); - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); return; } @@ -5215,7 +5205,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, for (unsigned i = RegStart; i != RegEnd; ++i) Regs.push_back(RegClassRegs[i]); - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(), + OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), OpInfo.ConstraintVT); OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; @@ -5332,7 +5322,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG); + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); // If this is a memory input, and if the operand is not indirect, do what we // need to to provide an address for the memory input. @@ -5406,6 +5396,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); + // Remember the AlignStack bit as operand 3. + AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0, + MVT::i1)); + // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. RegsForValue RetValRegs; @@ -5497,7 +5491,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } RegsForValue MatchedRegs; - MatchedRegs.TLI = &TLI; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); MatchedRegs.RegVTs.push_back(RegVT); @@ -5535,7 +5528,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::vector<SDValue> Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], - hasMemory, Ops, DAG); + Ops, DAG); if (Ops.empty()) report_fatal_error("Invalid operand for inline asm constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); @@ -5570,7 +5563,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty() || - !OpInfo.AssignedRegs.areValueTypesLegal()) + !OpInfo.AssignedRegs.areValueTypesLegal(TLI)) report_fatal_error("Couldn't allocate input reg for constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); @@ -5595,7 +5588,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Finish up input operands. Set the input chain and add the flag last. - AsmNodeOperands[0] = Chain; + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), @@ -5606,7 +5599,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this asm returns a register value, copy the result from that register // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { - SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, &Flag); // FIXME: Why don't we do this for inline asms with MRVs? @@ -5646,7 +5639,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; - SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, &Flag); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -5672,14 +5665,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { void SelectionDAGBuilder::visitVAStart(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), MVT::Other, getRoot(), - getValue(I.getOperand(1)), - DAG.getSrcValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { + const TargetData &TD = *TLI.getTargetData(); SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), - DAG.getSrcValue(I.getOperand(0))); + DAG.getSrcValue(I.getOperand(0)), + TD.getABITypeAlignment(I.getType())); setValue(&I, V); DAG.setRoot(V.getValue(1)); } @@ -5687,17 +5682,17 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), MVT::Other, getRoot(), - getValue(I.getOperand(1)), - DAG.getSrcValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), MVT::Other, getRoot(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - DAG.getSrcValue(I.getOperand(1)), - DAG.getSrcValue(I.getOperand(2)))); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getSrcValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(1)))); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo @@ -5715,6 +5710,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, DebugLoc dl) const { // Handle all of the outgoing arguments. SmallVector<ISD::OutputArg, 32> Outs; + SmallVector<SDValue, 32> OutVals; for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); @@ -5768,13 +5764,15 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 - ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs); + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), + i < NumFixedArgs); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) MyFlags.Flags.setOrigAlign(1); Outs.push_back(MyFlags); + OutVals.push_back(Parts[j]); } } } @@ -5803,7 +5801,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, SmallVector<SDValue, 4> InVals; Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); // Verify that the target's LowerCall behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -5876,7 +5874,7 @@ SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { - SDValue Op = getValue(V); + SDValue Op = getNonRegisterValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); @@ -5894,21 +5892,16 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. const Function &F = *LLVMBB->getParent(); SelectionDAG &DAG = SDB->DAG; - SDValue OldRoot = DAG.getRoot(); DebugLoc dl = SDB->getCurDebugLoc(); const TargetData *TD = TLI.getTargetData(); SmallVector<ISD::InputArg, 16> Ins; // Check whether the function can return without sret-demotion. - SmallVector<EVT, 4> OutVTs; - SmallVector<ISD::ArgFlagsTy, 4> OutsFlags; - getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), - OutVTs, OutsFlags, TLI); - FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); - - FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(), - OutVTs, OutsFlags, DAG); - if (!FLI.CanLowerReturn) { + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + Outs, TLI); + + if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); @@ -6002,7 +5995,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // Set up the argument values. unsigned i = 0; Idx = 1; - if (!FLI.CanLowerReturn) { + if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector<EVT, 1> ValueVTs; @@ -6016,7 +6009,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); - FLI.DemoteRegister = SRetReg; + FuncInfo->DemoteRegister = SRetReg; NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); @@ -6032,6 +6025,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); + + // If this argument is unused then remember its value. It is used to generate + // debugging information. + if (I->use_empty() && NumValues) + SDB->setUnusedArgValue(I, InVals[i]); + for (unsigned Value = 0; Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); @@ -6112,17 +6111,20 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (const Constant *C = dyn_cast<Constant>(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { - RegOut = FuncInfo.CreateRegForValue(C); + RegOut = FuncInfo.CreateRegs(C->getType()); CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; } else { - Reg = FuncInfo.ValueMap[PHIOp]; - if (Reg == 0) { + DenseMap<const Value *, unsigned>::iterator I = + FuncInfo.ValueMap.find(PHIOp); + if (I != FuncInfo.ValueMap.end()) + Reg = I->second; + else { assert(isa<AllocaInst>(PHIOp) && FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && "Didn't codegen value into a register!??"); - Reg = FuncInfo.CreateRegForValue(PHIOp); + Reg = FuncInfo.CreateRegs(PHIOp->getType()); CopyValueToVirtualRegister(PHIOp, Reg); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 3fcd4b9..46733d6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -88,6 +88,10 @@ class SelectionDAGBuilder { DebugLoc CurDebugLoc; DenseMap<const Value*, SDValue> NodeMap; + + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used + /// to preserve debug information for incoming arguments. + DenseMap<const Value*, SDValue> UnusedArgNodeMap; public: /// PendingLoads - Loads are not emitted to the program immediately. We bunch @@ -342,6 +346,8 @@ public: void visit(unsigned Opcode, const User &I); SDValue getValue(const Value *V); + SDValue getNonRegisterValue(const Value *V); + SDValue getValueImpl(const Value *V); void setValue(const Value *V, SDValue NewN) { SDValue &N = NodeMap[V]; @@ -349,6 +355,12 @@ public: N = NewN; } + void setUnusedArgValue(const Value *V, SDValue NewN) { + SDValue &N = UnusedArgNodeMap[V]; + assert(N.getNode() == 0 && "Already set a value for this node!"); + N = NewN; + } + void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, std::set<unsigned> &OutputRegs, std::set<unsigned> &InputRegs); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 65b8d4f..08ba548 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -14,7 +14,7 @@ #define DEBUG_TYPE "isel" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" -#include "FunctionLoweringInfo.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" @@ -171,7 +171,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), - CurDAG(new SelectionDAG(tm, *FuncInfo)), + CurDAG(new SelectionDAG(tm)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), @@ -244,7 +244,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); CurDAG->init(*MF); - FuncInfo->set(Fn, *MF, EnableFastISel); + FuncInfo->set(Fn, *MF); SDB->init(GFI, *AA); SelectAllBasicBlocks(Fn); @@ -300,7 +300,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); - if (II->isInlineAsm() || (TID.isCall() && !TID.isReturn())) { + + // Operand 1 of an inline asm instruction indicates whether the asm + // needs stack or not. + if ((II->isInlineAsm() && II->getOperand(1).getImm()) || + (TID.isCall() && !TID.isReturn())) { MFI->setHasCalls(true); goto done; } @@ -312,6 +316,26 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if there is a call to setjmp in the machine function. MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn)); + // Replace forward-declared registers with the registers containing + // the desired value. + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (DenseMap<unsigned, unsigned>::iterator + I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); + I != E; ++I) { + unsigned From = I->first; + unsigned To = I->second; + // If To is also scheduled to be replaced, find what its ultimate + // replacement is. + for (;;) { + DenseMap<unsigned, unsigned>::iterator J = + FuncInfo->RegFixups.find(To); + if (J == E) break; + To = J->second; + } + // Replace it. + MRI.replaceRegWith(From, To); + } + // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); @@ -319,10 +343,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { return true; } -MachineBasicBlock * -SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB, - const BasicBlock *LLVMBB, - BasicBlock::const_iterator Begin, +void +SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { // Lower all of the non-terminator instructions. If a call is emitted @@ -337,7 +359,7 @@ SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB, SDB->clear(); // Final step, emit the lowered DAG as machine code. - return CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } namespace { @@ -372,102 +394,6 @@ public: }; } -/// TrivialTruncElim - Eliminate some trivial nops that can result from -/// ShrinkDemandedOps: (trunc (ext n)) -> n. -static bool TrivialTruncElim(SDValue Op, - TargetLowering::TargetLoweringOpt &TLO) { - SDValue N0 = Op.getOperand(0); - EVT VT = Op.getValueType(); - if ((N0.getOpcode() == ISD::ZERO_EXTEND || - N0.getOpcode() == ISD::SIGN_EXTEND || - N0.getOpcode() == ISD::ANY_EXTEND) && - N0.getOperand(0).getValueType() == VT) { - return TLO.CombineTo(Op, N0.getOperand(0)); - } - return false; -} - -/// ShrinkDemandedOps - A late transformation pass that shrink expressions -/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts -/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. -void SelectionDAGISel::ShrinkDemandedOps() { - SmallVector<SDNode*, 128> Worklist; - SmallPtrSet<SDNode*, 128> InWorklist; - - // Add all the dag nodes to the worklist. - Worklist.reserve(CurDAG->allnodes_size()); - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - Worklist.push_back(I); - InWorklist.insert(I); - } - - TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true); - while (!Worklist.empty()) { - SDNode *N = Worklist.pop_back_val(); - InWorklist.erase(N); - - if (N->use_empty() && N != CurDAG->getRoot().getNode()) { - // Deleting this node may make its operands dead, add them to the worklist - // if they aren't already there. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (InWorklist.insert(N->getOperand(i).getNode())) - Worklist.push_back(N->getOperand(i).getNode()); - - CurDAG->DeleteNode(N); - continue; - } - - // Run ShrinkDemandedOp on scalar binary operations. - if (N->getNumValues() != 1 || - !N->getValueType(0).isSimple() || !N->getValueType(0).isInteger()) - continue; - - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Demanded = APInt::getAllOnesValue(BitWidth); - APInt KnownZero, KnownOne; - if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded, - KnownZero, KnownOne, TLO) && - (N->getOpcode() != ISD::TRUNCATE || - !TrivialTruncElim(SDValue(N, 0), TLO))) - continue; - - // Revisit the node. - assert(!InWorklist.count(N) && "Already in worklist"); - Worklist.push_back(N); - InWorklist.insert(N); - - // Replace the old value with the new one. - DEBUG(errs() << "\nShrinkDemandedOps replacing "; - TLO.Old.getNode()->dump(CurDAG); - errs() << "\nWith: "; - TLO.New.getNode()->dump(CurDAG); - errs() << '\n'); - - if (InWorklist.insert(TLO.New.getNode())) - Worklist.push_back(TLO.New.getNode()); - - SDOPsWorkListRemover DeadNodes(Worklist, InWorklist); - CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); - - if (!TLO.Old.getNode()->use_empty()) continue; - - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); - i != e; ++i) { - SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); - if (OpNode->hasOneUse()) { - // Add OpNode to the end of the list to revisit. - DeadNodes.RemoveFromWorklist(OpNode); - Worklist.push_back(OpNode); - InWorklist.insert(OpNode); - } - } - - DeadNodes.RemoveFromWorklist(TLO.Old.getNode()); - CurDAG->DeleteNode(TLO.Old.getNode()); - } -} - void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet<SDNode*, 128> VisitedNodes; SmallVector<SDNode*, 128> Worklist; @@ -522,7 +448,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { } while (!Worklist.empty()); } -MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { +void SelectionDAGISel::CodeGenAndEmitDAG() { std::string GroupName; if (TimePassesIsEnabled) GroupName = "Instruction Selection and Scheduling"; @@ -531,23 +457,19 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) BlockName = MF->getFunction()->getNameStr() + ":" + - BB->getBasicBlock()->getNameStr(); + FuncInfo->MBB->getBasicBlock()->getNameStr(); - DEBUG(dbgs() << "Initial selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump()); if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); // Run the DAG combiner in pre-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining 1", GroupName); - CurDAG->Combine(Unrestricted, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); CurDAG->Combine(Unrestricted, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized lowered selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. @@ -555,44 +477,36 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { BlockName); bool Changed; - if (TimePassesIsEnabled) { - NamedRegionTimer T("Type Legalization", GroupName); - Changed = CurDAG->LegalizeTypes(); - } else { + { + NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled); Changed = CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Type-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump()); if (Changed) { if (ViewDAGCombineLT) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining after legalize types", GroupName); - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining after legalize types", GroupName, + TimePassesIsEnabled); CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"; + CurDAG->dump()); } - if (TimePassesIsEnabled) { - NamedRegionTimer T("Vector Legalization", GroupName); - Changed = CurDAG->LegalizeVectors(); - } else { + { + NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled); Changed = CurDAG->LegalizeVectors(); } if (Changed) { - if (TimePassesIsEnabled) { - NamedRegionTimer T("Type Legalization 2", GroupName); - CurDAG->LegalizeTypes(); - } else { + { + NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled); CurDAG->LegalizeTypes(); } @@ -600,95 +514,79 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { CurDAG->viewGraph("dag-combine-lv input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining after legalize vectors", GroupName); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, + TimePassesIsEnabled); CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"; + CurDAG->dump()); } if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Legalization", GroupName); - CurDAG->Legalize(OptLevel); - } else { + { + NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); CurDAG->Legalize(OptLevel); } - DEBUG(dbgs() << "Legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump()); if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); // Run the DAG combiner in post-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining 2", GroupName); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump()); - if (OptLevel != CodeGenOpt::None) { - ShrinkDemandedOps(); + if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); - } if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); // Third, instruction select all of the operations to machine code, adding the // code to the MachineBasicBlock. - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Selection", GroupName); - DoInstructionSelection(); - } else { + { + NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled); DoInstructionSelection(); } - DEBUG(dbgs() << "Selected selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump()); if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); // Schedule machine code. ScheduleDAGSDNodes *Scheduler = CreateScheduler(); - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Scheduling", GroupName); - Scheduler->Run(CurDAG, BB, BB->end()); - } else { - Scheduler->Run(CurDAG, BB, BB->end()); + { + NamedRegionTimer T("Instruction Scheduling", GroupName, + TimePassesIsEnabled); + Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt); } if (ViewSUnitDAGs) Scheduler->viewGraph(); // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Creation", GroupName); - BB = Scheduler->EmitSchedule(); - } else { - BB = Scheduler->EmitSchedule(); + { + NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); + + FuncInfo->MBB = Scheduler->EmitSchedule(); + FuncInfo->InsertPt = Scheduler->InsertPos; } // Free the scheduler state. - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName); - delete Scheduler; - } else { + { + NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName, + TimePassesIsEnabled); delete Scheduler; } // Free the SelectionDAG state, now that we're finished with it. CurDAG->clear(); - - return BB; } void SelectionDAGISel::DoInstructionSelection() { @@ -750,21 +648,22 @@ void SelectionDAGISel::DoInstructionSelection() { /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and /// do other setup for EH landing-pad blocks. -void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) { +void SelectionDAGISel::PrepareEHLandingPad() { // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. - MCSymbol *Label = MF->getMMI().addLandingPad(BB); + MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB); const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); - BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label); + BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) + .addSym(Label); // Mark exception register as live in. unsigned Reg = TLI.getExceptionAddressRegister(); - if (Reg) BB->addLiveIn(Reg); + if (Reg) FuncInfo->MBB->addLiveIn(Reg); // Mark exception selector register as live in. Reg = TLI.getExceptionSelectorRegister(); - if (Reg) BB->addLiveIn(Reg); + if (Reg) FuncInfo->MBB->addLiveIn(Reg); // FIXME: Hack around an exception handling flaw (PR1508): the personality // function and list of typeids logically belong to the invoke (or, if you @@ -777,7 +676,7 @@ void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) { // in exceptions not being caught because no typeids are associated with // the invoke. This may not be the only way things can go wrong, but it // is the only way we try to work around for the moment. - const BasicBlock *LLVMBB = BB->getBasicBlock(); + const BasicBlock *LLVMBB = FuncInfo->MBB->getBasicBlock(); const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); if (Br && Br->isUnconditional()) { // Critical edge? @@ -796,83 +695,100 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (EnableFastISel) - FastIS = TLI.createFastISel(*MF, FuncInfo->ValueMap, FuncInfo->MBBMap, - FuncInfo->StaticAllocaMap, - FuncInfo->PHINodesToUpdate -#ifndef NDEBUG - , FuncInfo->CatchInfoLost -#endif - ); + FastIS = TLI.createFastISel(*FuncInfo); // Iterate over all basic blocks in the function. for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { const BasicBlock *LLVMBB = &*I; - MachineBasicBlock *BB = FuncInfo->MBBMap[LLVMBB]; + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; + FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); BasicBlock::const_iterator const End = LLVMBB->end(); - BasicBlock::const_iterator BI = Begin; + BasicBlock::const_iterator BI = End; + FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); + + // Setup an EH landing-pad block. + if (FuncInfo->MBB->isLandingPad()) + PrepareEHLandingPad(); + // Lower any arguments needed in this block if this is the entry block. if (LLVMBB == &Fn.getEntryBlock()) LowerArguments(LLVMBB); - // Setup an EH landing-pad block. - if (BB->isLandingPad()) - PrepareEHLandingPad(BB); - // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { + FastIS->startNewBlock(); + // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { CurDAG->setRoot(SDB->getControlRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); + + // If we inserted any instructions at the beginning, make a note of + // where they are, so we can be sure to emit subsequent instructions + // after them. + if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) + FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt)); + else + FastIS->setLastLocalValue(0); } - FastIS->startNewBlock(BB); + // Do FastISel on as many instructions as possible. - for (; BI != End; ++BI) { + for (; BI != Begin; --BI) { + const Instruction *Inst = llvm::prior(BI); + + // If we no longer require this instruction, skip it. + if (!Inst->mayWriteToMemory() && + !isa<TerminatorInst>(Inst) && + !isa<DbgInfoIntrinsic>(Inst) && + !FuncInfo->isExportedInst(Inst)) + continue; + + // Bottom-up: reset the insert pos at the top, after any local-value + // instructions. + FastIS->recomputeInsertPt(); + // Try to select the instruction with FastISel. - if (FastIS->SelectInstruction(BI)) + if (FastIS->SelectInstruction(Inst)) continue; // Then handle certain instructions as single-LLVM-Instruction blocks. - if (isa<CallInst>(BI)) { + if (isa<CallInst>(Inst)) { ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed call: "; - BI->dump(); + Inst->dump(); } - if (!BI->getType()->isVoidTy() && !BI->use_empty()) { - unsigned &R = FuncInfo->ValueMap[BI]; + if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { + unsigned &R = FuncInfo->ValueMap[Inst]; if (!R) - R = FuncInfo->CreateRegForValue(BI); + R = FuncInfo->CreateRegs(Inst->getType()); } bool HadTailCall = false; - BB = SelectBasicBlock(BB, LLVMBB, BI, llvm::next(BI), HadTailCall); + SelectBasicBlock(Inst, BI, HadTailCall); // If the call was emitted as a tail call, we're done with the block. if (HadTailCall) { - BI = End; + --BI; break; } - // If the instruction was codegen'd with multiple blocks, - // inform the FastISel object where to resume inserting. - FastIS->setCurrentBlock(BB); continue; } // Otherwise, give up on FastISel for the rest of the block. // For now, be a little lenient about non-branch terminators. - if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) { + if (!isa<TerminatorInst>(Inst) || isa<BranchInst>(Inst)) { ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; - BI->dump(); + Inst->dump(); } if (EnableFastISelAbort) // The "fast" selector couldn't handle something and bailed. @@ -881,17 +797,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } break; } + + FastIS->recomputeInsertPt(); } // Run SelectionDAG instruction selection on the remainder of the block // not handled by FastISel. If FastISel is not run, this is the entire // block. - if (BI != End) { - bool HadTailCall; - BB = SelectBasicBlock(BB, LLVMBB, BI, End, HadTailCall); - } + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); - FinishBasicBlock(BB); + FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); } @@ -899,11 +815,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } void -SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { +SelectionDAGISel::FinishBasicBlock() { DEBUG(dbgs() << "Total amount of phi nodes to update: " - << FuncInfo->PHINodesToUpdate.size() << "\n"); - DEBUG(for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) + << FuncInfo->PHINodesToUpdate.size() << "\n"; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) dbgs() << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); @@ -917,11 +833,11 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); - if (!BB->isSuccessor(PHI->getParent())) + if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) continue; PHI->addOperand( MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(BB)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); } return; } @@ -930,33 +846,35 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // Lower header first, if it wasn't already lowered if (!SDB->BitTestCases[i].Emitted) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->BitTestCases[i].Parent; + FuncInfo->MBB = SDB->BitTestCases[i].Parent; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - SDB->visitBitTestHeader(SDB->BitTestCases[i], BB); + SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code if (j+1 != ej) SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], - BB); + FuncInfo->MBB); else SDB->visitBitTestCase(SDB->BitTestCases[i].Default, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], - BB); + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } // Update PHI Nodes @@ -1001,22 +919,24 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // Lower header first, if it wasn't already lowered if (!SDB->JTCases[i].first.Emitted) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->JTCases[i].first.HeaderBB; + FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first, - BB); + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->JTCases[i].second.MBB; + FuncInfo->MBB = SDB->JTCases[i].second.MBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code SDB->visitJumpTable(SDB->JTCases[i].second); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); // Update PHI Nodes for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); @@ -1034,11 +954,11 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB)); } // JT BB. Just iterate over successors here - if (BB->isSuccessor(PHIBB)) { + if (FuncInfo->MBB->isSuccessor(PHIBB)) { PHI->addOperand (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(BB)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); } } } @@ -1050,10 +970,10 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); - if (BB->isSuccessor(PHI->getParent())) { + if (FuncInfo->MBB->isSuccessor(PHI->getParent())) { PHI->addOperand( MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(BB)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); } } @@ -1061,7 +981,8 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // additional DAGs necessary. for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into - MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB; + MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Determine the unique successors. SmallVector<MachineBasicBlock *, 2> Succs; @@ -1071,21 +992,24 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // Emit the code. Note that this could result in ThisBB being split, so // we need to check for updates. - SDB->visitSwitchCase(SDB->SwitchCases[i], BB); + SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - ThisBB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); + ThisBB = FuncInfo->MBB; // Handle any PHI nodes in successors of this chunk, as if we were coming // from the original BB before switch expansion. Note that PHI nodes can // occur multiple times in PHINodesToUpdate. We have to be very careful to // handle them the right number of times. for (unsigned i = 0, e = Succs.size(); i != e; ++i) { - BB = Succs[i]; - // BB may have been removed from the CFG if a branch was constant folded. - if (ThisBB->isSuccessor(BB)) { - for (MachineBasicBlock::iterator Phi = BB->begin(); - Phi != BB->end() && Phi->isPHI(); + FuncInfo->MBB = Succs[i]; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // FuncInfo->MBB may have been removed from the CFG if a branch was + // constant folded. + if (ThisBB->isSuccessor(FuncInfo->MBB)) { + for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin(); + Phi != FuncInfo->MBB->end() && Phi->isPHI(); ++Phi) { // This value for this PHI node is recorded in PHINodesToUpdate. for (unsigned pn = 0; ; ++pn) { @@ -1205,6 +1129,7 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc + Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3 unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); if (InOps[e-1].getValueType() == MVT::Flag) @@ -1701,7 +1626,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, SDValue(Res, ResNumResults-1)); if ((EmitNodeInfo & OPFL_FlagOutput) != 0) - --ResNumResults; + --ResNumResults; // Move the chain reference if needed. if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 3786bd1..6cae804 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -278,7 +278,7 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { FlaggedNodes.push_back(N); while (!FlaggedNodes.empty()) { O << DOTGraphTraits<SelectionDAG*> - ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); + ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); FlaggedNodes.pop_back(); if (!FlaggedNodes.empty()) O << "\n "; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 44a80d3..4f38669 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -261,6 +262,38 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; + Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; + Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; + Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; + Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; + Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; + Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; + Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; + Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; + Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; + Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; + Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; + Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; + Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; + Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4"; + Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; + Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; + Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; + Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -546,9 +579,9 @@ TargetLowering::TargetLowering(const TargetMachine &tm, SchedPreferenceInfo = Sched::Latency; JumpBufSize = 0; JumpBufAlignment = 0; - IfCvtBlockSizeLimit = 2; - IfCvtDupBlockSizeLimit = 0; PrefLoopAlignment = 0; + MinStackArgumentAlignment = 1; + ShouldFoldAtomicFences = false; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); @@ -578,9 +611,9 @@ bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, - unsigned &NumIntermediates, - EVT &RegisterVT, - TargetLowering* TLI) { + unsigned &NumIntermediates, + EVT &RegisterVT, + TargetLowering *TLI) { // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType(); @@ -610,16 +643,12 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, EVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; - if (EVT(DestVT).bitsLT(NewVT)) { - // Value is expanded, e.g. i64 -> i16. + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); - } else { - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; - } - return 1; + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; } /// computeRegisterProperties - Once all of the register classes are added, @@ -705,39 +734,39 @@ void TargetLowering::computeRegisterProperties() { for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT VT = (MVT::SimpleValueType)i; - if (!isTypeLegal(VT)) { - MVT IntermediateVT; - EVT RegisterVT; - unsigned NumIntermediates; - NumRegistersForVT[i] = - getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, - RegisterVT, this); - RegisterTypeForVT[i] = RegisterVT; - - // Determine if there is a legal wider type. - bool IsLegalWiderType = false; - EVT EltVT = VT.getVectorElementType(); - unsigned NElts = VT.getVectorNumElements(); - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - EVT SVT = (MVT::SimpleValueType)nVT; - if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && NElts != 1) { - TransformToType[i] = SVT; - ValueTypeActions.setTypeAction(VT, Promote); - IsLegalWiderType = true; - break; - } + if (isTypeLegal(VT)) continue; + + MVT IntermediateVT; + EVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, + RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + // Determine if there is a legal wider type. + bool IsLegalWiderType = false; + EVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + EVT SVT = (MVT::SimpleValueType)nVT; + if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts && NElts != 1) { + TransformToType[i] = SVT; + ValueTypeActions.setTypeAction(VT, Promote); + IsLegalWiderType = true; + break; } - if (!IsLegalWiderType) { - EVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - ValueTypeActions.setTypeAction(VT, Expand); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, Promote); - } + } + if (!IsLegalWiderType) { + EVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + ValueTypeActions.setTypeAction(VT, Expand); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, Promote); } } } @@ -811,6 +840,65 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, return 1; } +/// Get the EVTs and ArgFlags collections that represent the legalized return +/// type of the given function. This does not require a DAG or a return value, +/// and is suitable for use before any DAGs for the function are constructed. +/// TODO: Move this out of TargetLowering.cpp. +void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr, + SmallVectorImpl<ISD::OutputArg> &Outs, + const TargetLowering &TLI, + SmallVectorImpl<uint64_t> *Offsets) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, ReturnType, ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + unsigned Offset = 0; + + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (attr & Attribute::SExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr & Attribute::ZExt) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( + PartVT.getTypeForEVT(ReturnType->getContext())); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr & Attribute::InReg) + Flags.setInReg(); + + // Propagate extension type if any + if (attr & Attribute::SExt) + Flags.setSExt(); + else if (attr & Attribute::ZExt) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true)); + if (Offsets) { + Offsets->push_back(Offset); + Offset += PartSize; + } + } + } +} + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. @@ -1042,7 +1130,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -1076,7 +1164,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -1101,7 +1189,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // If all of the unknown bits are known to be zero on one side or the other @@ -1498,13 +1586,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::AssertZext: { - EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - APInt InMask = APInt::getLowBitsSet(BitWidth, - VT.getSizeInBits()); - if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask, + // Demand all the bits of the input that are demanded in the output. + // The low bits are obvious; the high bits are demanded because we're + // asserting that they're zero here. + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); KnownZero |= ~InMask & NewMask; break; } @@ -1544,7 +1636,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne2, TLO, Depth+1)) return true; // See if the operation should be performed at a smaller bit width. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; } // FALL THROUGH @@ -2346,7 +2438,6 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ /// vector. If it is invalid, don't add anything to Ops. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { switch (ConstraintLetter) { @@ -2384,7 +2475,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (ConstraintLetter != 'n') { int64_t Offs = GA->getOffset(); if (C) Offs += C->getZExtValue(); - Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + C->getDebugLoc(), Op.getValueType(), Offs)); return; } @@ -2507,18 +2599,18 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { /// 'm' over 'r', for example. /// static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, - bool hasMemory, const TargetLowering &TLI, + const TargetLowering &TLI, SDValue Op, SelectionDAG *DAG) { assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); unsigned BestIdx = 0; TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; int BestGenerality = -1; - + // Loop over the options, keeping track of the most general one. for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[i]); - + // If this is an 'other' constraint, see if the operand is valid for it. // For example, on X86 we might have an 'rI' constraint. If the operand // is an integer in the range [0..31] we want to use I (saving a load @@ -2527,7 +2619,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, assert(OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint"); std::vector<SDValue> ResultOps; - TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory, + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], ResultOps, *DAG); if (!ResultOps.empty()) { BestType = CType; @@ -2536,6 +2628,11 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, } } + // Things with matching constraints can only be registers, per gcc + // documentation. This mainly affects "g" constraints. + if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) + continue; + // This constraint letter is more general than the previous one, use it. int Generality = getConstraintGenerality(CType); if (Generality > BestGenerality) { @@ -2554,7 +2651,6 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, /// OpInfo.ConstraintCode and OpInfo.ConstraintType. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, - bool hasMemory, SelectionDAG *DAG) const { assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); @@ -2563,7 +2659,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, OpInfo.ConstraintCode = OpInfo.Codes[0]; OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); } else { - ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG); + ChooseConstraint(OpInfo, *this, Op, DAG); } // 'X' matches anything. |