diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp')
-rw-r--r-- | contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 1997 |
1 files changed, 1239 insertions, 758 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 996c95b..1273120 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -83,24 +83,6 @@ LimitFPPrecision("limit-float-precision", "for some float libcalls"), cl::location(LimitFloatPrecision), cl::init(0)); - -static cl::opt<bool> -EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, - cl::desc("Enable fast-math-flags for DAG nodes")); - -/// Minimum jump table density for normal functions. -static cl::opt<unsigned> -JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, - cl::desc("Minimum density for building a jump table in " - "a normal function")); - -/// Minimum jump table density for -Os or -Oz functions. -static cl::opt<unsigned> -OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden, - cl::desc("Minimum density for building a jump table in " - "an optsize function")); - - // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -117,9 +99,31 @@ OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden, // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; +// True if the Value passed requires ABI mangling as it is a parameter to a +// function or a return value from a function which is not an intrinsic. +static bool isABIRegCopy(const Value * V) { + const bool IsRetInst = V && isa<ReturnInst>(V); + const bool IsCallInst = V && isa<CallInst>(V); + const bool IsInLineAsm = + IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm(); + const bool IsIndirectFunctionCall = + IsCallInst && !IsInLineAsm && + !static_cast<const CallInst *>(V)->getCalledFunction(); + // It is possible that the call instruction is an inline asm statement or an + // indirect function call in which case the return value of + // getCalledFunction() would be nullptr. + const bool IsInstrinsicCall = + IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall && + static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() != + Intrinsic::not_intrinsic; + + return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall)); +} + static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V); + MVT PartVT, EVT ValueVT, const Value *V, + bool IsABIRegCopy); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type @@ -129,10 +133,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - Optional<ISD::NodeType> AssertOp = None) { + Optional<ISD::NodeType> AssertOp = None, + bool IsABIRegCopy = false) { if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, - PartVT, ValueVT, V); + PartVT, ValueVT, V, IsABIRegCopy); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -276,7 +281,8 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, /// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V) { + MVT PartVT, EVT ValueVT, const Value *V, + bool IsABIRegCopy) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -287,9 +293,18 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = - TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, - NumIntermediates, RegisterVT); + unsigned NumRegs; + + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); @@ -318,9 +333,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. + EVT BuiltVectorTy = + EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(), + (IntermediateVT.isVector() + ? IntermediateVT.getVectorNumElements() * NumParts + : NumIntermediates)); Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, - DL, ValueVT, Ops); + DL, BuiltVectorTy, Ops); } // There is now one part, held in Val. Correct it to match ValueVT. @@ -359,23 +379,40 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() != 1) { - diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, - "non-trivial scalar-to-vector conversion"); - return DAG.getUNDEF(ValueVT); + // Certain ABIs require that vectors are passed as integers. For vectors + // are the same size, this is an obvious bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) { + // Bitcast Val back the original type and extract the corresponding + // vector we want. + unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits(); + EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(), + ValueVT.getVectorElementType(), Elts); + Val = DAG.getBitcast(WiderVecType, Val); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + diagnosePossiblyInvalidConstraint( + *DAG.getContext(), V, "non-trivial scalar-to-vector conversion"); + return DAG.getUNDEF(ValueVT); } - if (ValueVT.getVectorNumElements() == 1 && - ValueVT.getVectorElementType() != PartEVT) - Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); + // Handle cases such as i8 -> <1 x i1> + EVT ValueSVT = ValueVT.getVectorElementType(); + if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) + Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) + : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); - return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); + return DAG.getBuildVector(ValueVT, DL, Val); } static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V); + MVT PartVT, const Value *V, bool IsABIRegCopy); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for @@ -383,12 +420,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + ISD::NodeType ExtendKind = ISD::ANY_EXTEND, + bool IsABIRegCopy = false) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) - return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V, + IsABIRegCopy); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; @@ -513,7 +552,9 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V) { + MVT PartVT, const Value *V, + bool IsABIRegCopy) { + EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -541,7 +582,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, e = PartVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ElementVT)); - Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); + Val = DAG.getBuildVector(PartVT, DL, Ops); // FIXME: Use CONCAT for 2x -> 4x. @@ -554,17 +595,23 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); - } else{ - // Vector -> scalar conversion. - assert(ValueVT.getVectorNumElements() == 1 && - "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } else { + if (ValueVT.getVectorNumElements() == 1) { + Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } else { + assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && + "lossy conversion of vector to scalar type"); + EVT IntermediateType = + EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getBitcast(IntermediateType, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } } + assert(Val.getValueType() == PartVT && "Unexpected vector part value type"); Parts[0] = Val; return; } @@ -573,15 +620,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, - IntermediateVT, - NumIntermediates, RegisterVT); + unsigned NumRegs; + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + // Convert the vector to the appropiate type if necessary. + unsigned DestVectorNoElts = + NumIntermediates * + (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1); + EVT BuiltVectorTy = EVT::getVectorVT( + *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts); + if (Val.getValueType() != BuiltVectorTy) + Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); + // Split the vector into intermediate operands. SmallVector<SDValue, 8> Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { @@ -614,30 +677,35 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } } -RegsForValue::RegsForValue() {} +RegsForValue::RegsForValue() { IsABIMangled = false; } RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, - EVT valuevt) - : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + EVT valuevt, bool IsABIMangledValue) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), + RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, unsigned Reg, Type *Ty) { + const DataLayout &DL, unsigned Reg, Type *Ty, + bool IsABIMangledValue) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); + IsABIMangled = IsABIMangledValue; + for (EVT ValueVT : ValueVTs) { - unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); - MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); + unsigned NumRegs = IsABIMangledValue + ? TLI.getNumRegistersForCallingConv(Context, ValueVT) + : TLI.getNumRegisters(Context, ValueVT); + MVT RegisterVT = IsABIMangledValue + ? TLI.getRegisterTypeForCallingConv(Context, ValueVT) + : TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); + RegCount.push_back(NumRegs); Reg += NumRegs; } } -/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from -/// this value and returns the result as a ValueVT value. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, const SDLoc &dl, SDValue &Chain, @@ -654,8 +722,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - MVT RegisterVT = RegVTs[Value]; + unsigned NumRegs = RegCount[Value]; + MVT RegisterVT = IsABIMangled + ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -683,7 +753,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, unsigned RegSize = RegisterVT.getSizeInBits(); unsigned NumSignBits = LOI->NumSignBits; - unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); + unsigned NumZeroBits = LOI->Known.countMinLeadingZeros(); if (NumZeroBits == RegSize) { // The current value is a zero. @@ -739,10 +809,6 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); } -/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the -/// specified value into the registers specified by this object. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, SDValue *Flag, const Value *V, @@ -754,9 +820,11 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, unsigned NumRegs = Regs.size(); SmallVector<SDValue, 8> Parts(NumRegs); for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - MVT RegisterVT = RegVTs[Value]; + unsigned NumParts = RegCount[Value]; + + MVT RegisterVT = IsABIMangled + ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; @@ -796,9 +864,6 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } -/// AddInlineAsmOperands - Add this value to the specified inlineasm node -/// operand list. This adds the code marker and includes the number of -/// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, @@ -840,9 +905,9 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } } -void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, const TargetLibraryInfo *li) { - AA = &aa; + AA = aa; GFI = gfi; LibInfo = li; DL = &DAG.getDataLayout(); @@ -850,12 +915,6 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, LPadToCallSiteMap.clear(); } -/// clear - Clear out the current SelectionDAG and the associated -/// state and prepare this SelectionDAGBuilder object to be used -/// for a new block. This doesn't clear out information about -/// additional blocks that are needed to complete switch lowering -/// or PHI node updating; that information is cleared out as it is -/// consumed. void SelectionDAGBuilder::clear() { NodeMap.clear(); UnusedArgNodeMap.clear(); @@ -867,21 +926,10 @@ void SelectionDAGBuilder::clear() { StatepointLowering.clear(); } -/// clearDanglingDebugInfo - Clear the dangling debug information -/// map. This function is separated from the clear so that debug -/// information that is dangling in a basic block can be properly -/// resolved in a different basic block. This allows the -/// SelectionDAG to resolve dangling debug information attached -/// to PHI nodes. void SelectionDAGBuilder::clearDanglingDebugInfo() { DanglingDebugInfoMap.clear(); } -/// getRoot - Return the current virtual root of the Selection DAG, -/// flushing any PendingLoad items. This must be done before emitting -/// a store or any other node that may need to be ordered after any -/// prior load instructions. -/// SDValue SelectionDAGBuilder::getRoot() { if (PendingLoads.empty()) return DAG.getRoot(); @@ -901,10 +949,6 @@ SDValue SelectionDAGBuilder::getRoot() { return Root; } -/// getControlRoot - Similar to getRoot, but instead of flushing all the -/// PendingLoad items, flush all the PendingExports items. It is necessary -/// to do this before emitting a terminator instruction. -/// SDValue SelectionDAGBuilder::getControlRoot() { SDValue Root = DAG.getRoot(); @@ -937,7 +981,9 @@ void SelectionDAGBuilder::visit(const Instruction &I) { HandlePHINodesInSuccessorBlocks(I.getParent()); } - ++SDNodeOrder; + // Increase the SDNodeOrder if dealing with a non-debug instruction. + if (!isa<DbgInfoIntrinsic>(I)) + ++SDNodeOrder; CurInst = &I; @@ -1001,10 +1047,12 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), InReg, Ty); + DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V)); SDValue Chain = DAG.getEntryNode(); - Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, + V); resolveDanglingDebugInfo(V, Result); } @@ -1122,8 +1170,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa<ArrayType>(CDS->getType())) return DAG.getMergeValues(Ops, getCurSDLoc()); - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, Ops); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { @@ -1175,7 +1222,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } // If this is a static alloca, generate it as the frameindex instead of @@ -1185,14 +1232,15 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) return DAG.getFrameIndex(SI->second, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFrameIndexTy(DAG.getDataLayout())); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast<Instruction>(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType()); + Inst->getType(), isABIRegCopy(V)); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1384,7 +1432,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i], getCurSDLoc()), - &Flags); + Flags); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), // FIXME: better loc info would be nice. @@ -1403,16 +1451,16 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const Function *F = I.getParent()->getParent(); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; LLVMContext &Context = F->getContext(); - bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, - Attribute::InReg); + bool RetInReg = F->getAttributes().hasAttribute( + AttributeList::ReturnIndex, Attribute::InReg); for (unsigned j = 0; j != NumValues; ++j) { EVT VT = ValueVTs[j]; @@ -1420,12 +1468,12 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); - unsigned NumParts = TLI.getNumRegisters(Context, VT); - MVT PartVT = TLI.getRegisterType(Context, VT); + unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT); + MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT); SmallVector<SDValue, 4> Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, &I, ExtendKind); + &Parts[0], NumParts, PartVT, &I, ExtendKind, true); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -1461,9 +1509,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { true /*isfixed*/, 1 /*origidx*/, 0 /*partOffs*/)); // Create SDNode for the swifterror virtual register. - OutVals.push_back(DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg( - FuncInfo.MBB, FuncInfo.SwiftErrorArg), - EVT(TLI.getPointerTy(DL)))); + OutVals.push_back( + DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt( + &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first, + EVT(TLI.getPointerTy(DL)))); } bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); @@ -1582,7 +1631,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, BranchProbability TProb, - BranchProbability FProb) { + BranchProbability FProb, + bool InvertCond) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1596,10 +1646,14 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { ISD::CondCode Condition; if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { - Condition = getICmpCondCode(IC->getPredicate()); + ICmpInst::Predicate Pred = + InvertCond ? IC->getInversePredicate() : IC->getPredicate(); + Condition = getICmpCondCode(Pred); } else { const FCmpInst *FC = cast<FCmpInst>(Cond); - Condition = getFCmpCondCode(FC->getPredicate()); + FCmpInst::Predicate Pred = + InvertCond ? FC->getInversePredicate() : FC->getPredicate(); + Condition = getFCmpCondCode(Pred); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); } @@ -1612,7 +1666,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, } // Create a CaseBlock record representing this branch. - CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), + ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ; + CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()), nullptr, TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); } @@ -1625,16 +1680,44 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *SwitchBB, Instruction::BinaryOps Opc, BranchProbability TProb, - BranchProbability FProb) { - // If this node is not part of the or/and tree, emit it as a branch. + BranchProbability FProb, + bool InvertCond) { + // Skip over not part of the tree and remember to invert op and operands at + // next level. + if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) { + const Value *CondOp = BinaryOperator::getNotArgument(Cond); + if (InBlock(CondOp, CurBB->getBasicBlock())) { + FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, + !InvertCond); + return; + } + } + const Instruction *BOp = dyn_cast<Instruction>(Cond); + // Compute the effective opcode for Cond, taking into account whether it needs + // to be inverted, e.g. + // and (not (or A, B)), C + // gets lowered as + // and (and (not A, not B), C) + unsigned BOpc = 0; + if (BOp) { + BOpc = BOp->getOpcode(); + if (InvertCond) { + if (BOpc == Instruction::And) + BOpc = Instruction::Or; + else if (BOpc == Instruction::Or) + BOpc = Instruction::And; + } + } + + // If this node is not part of the or/and tree, emit it as a branch. if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || - (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOpc != Opc || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, - TProb, FProb); + TProb, FProb, InvertCond); return; } @@ -1669,14 +1752,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, auto NewFalseProb = TProb / 2 + FProb; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, - NewTrueProb, NewFalseProb); + NewTrueProb, NewFalseProb, InvertCond); // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - Probs[0], Probs[1]); + Probs[0], Probs[1], InvertCond); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: @@ -1702,14 +1785,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, auto NewFalseProb = FProb / 2; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, - NewTrueProb, NewFalseProb); + NewTrueProb, NewFalseProb, InvertCond); // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - Probs[0], Probs[1]); + Probs[0], Probs[1], InvertCond); } } @@ -1793,7 +1876,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), - getEdgeProbability(BrMBB, Succ1MBB)); + getEdgeProbability(BrMBB, Succ1MBB), + /*InvertCond=*/false); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. @@ -2027,7 +2111,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, Entry.Node = StackSlot; Entry.Ty = FnTy->getParamType(0); if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) - Entry.isInReg = true; + Entry.IsInReg = true; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); @@ -2581,15 +2665,15 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); Flags.setVectorReduction(vec_redux); - if (EnableFMFInDAG) { - Flags.setAllowReciprocal(FMF.allowReciprocal()); - Flags.setNoInfs(FMF.noInfs()); - Flags.setNoNaNs(FMF.noNaNs()); - Flags.setNoSignedZeros(FMF.noSignedZeros()); - Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); - } + Flags.setAllowReciprocal(FMF.allowReciprocal()); + Flags.setAllowContract(FMF.allowContract()); + Flags.setNoInfs(FMF.noInfs()); + Flags.setNoNaNs(FMF.noNaNs()); + Flags.setNoSignedZeros(FMF.noSignedZeros()); + Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); + SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), - Op1, Op2, &Flags); + Op1, Op2, Flags); setValue(&I, BinNodeValue); } @@ -2642,7 +2726,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, - &Flags); + Flags); setValue(&I, Res); } @@ -2654,7 +2738,7 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { Flags.setExact(isa<PossiblyExactOperator>(&I) && cast<PossiblyExactOperator>(&I)->isExact()); setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, - Op2, &Flags)); + Op2, Flags)); } void SelectionDAGBuilder::visitICmp(const User &I) { @@ -2914,7 +2998,7 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { DestVT, N)); // convert types. // Check if the original LLVM IR Operand was a ConstantInt, because getValue() // might fold any kind of constant expression to an integer constant and that - // is not what we are looking for. Only regcognize a bitcast of a genuine + // is not what we are looking for. Only recognize a bitcast of a genuine // constant integer as an opaque constant. else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, @@ -3067,14 +3151,10 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (SrcNumElts > MaskNumElts) { // Analyze the access pattern of the vector to see if we can extract - // two subvectors and do the shuffle. The analysis is done by calculating - // the range of elements the mask access on both vectors. - int MinRange[2] = { static_cast<int>(SrcNumElts), - static_cast<int>(SrcNumElts)}; - int MaxRange[2] = {-1, -1}; - - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; + // two subvectors and do the shuffle. + int StartIdx[2] = { -1, -1 }; // StartIdx to extract from + bool CanExtract = true; + for (int Idx : Mask) { unsigned Input = 0; if (Idx < 0) continue; @@ -3083,41 +3163,28 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { Input = 1; Idx -= SrcNumElts; } - if (Idx > MaxRange[Input]) - MaxRange[Input] = Idx; - if (Idx < MinRange[Input]) - MinRange[Input] = Idx; - } - - // Check if the access is smaller than the vector size and can we find - // a reasonable extract index. - int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not - // Extract. - int StartIdx[2]; // StartIdx to extract from - for (unsigned Input = 0; Input < 2; ++Input) { - if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { - RangeUse[Input] = 0; // Unused - StartIdx[Input] = 0; - continue; - } - // Find a good start index that is a multiple of the mask length. Then - // see if the rest of the elements are in range. - StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; - if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts <= SrcNumElts) - RangeUse[Input] = 1; // Extract from a multiple of the mask length. + // If all the indices come from the same MaskNumElts sized portion of + // the sources we can use extract. Also make sure the extract wouldn't + // extract past the end of the source. + int NewStartIdx = alignDown(Idx, MaskNumElts); + if (NewStartIdx + MaskNumElts > SrcNumElts || + (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx)) + CanExtract = false; + // Make sure we always update StartIdx as we use it to track if all + // elements are undef. + StartIdx[Input] = NewStartIdx; } - if (RangeUse[0] == 0 && RangeUse[1] == 0) { + if (StartIdx[0] < 0 && StartIdx[1] < 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } - if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { + if (CanExtract) { // Extract appropriate subvector and generate a vector shuffle for (unsigned Input = 0; Input < 2; ++Input) { SDValue &Src = Input == 0 ? Src1 : Src2; - if (RangeUse[Input] == 0) + if (StartIdx[Input] < 0) Src = DAG.getUNDEF(VT); else { Src = DAG.getNode( @@ -3128,16 +3195,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { } // Calculate new mask. - SmallVector<int, 8> MappedOps; - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; - if (Idx >= 0) { - if (Idx < (int)SrcNumElts) - Idx -= StartIdx[0]; - else - Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; - } - MappedOps.push_back(Idx); + SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end()); + for (int &Idx : MappedOps) { + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + else if (Idx >= 0) + Idx -= StartIdx[0]; } setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps)); @@ -3151,8 +3214,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { EVT EltVT = VT.getVectorElementType(); EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); SmallVector<SDValue,8> Ops; - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; + for (int Idx : Mask) { SDValue Res; if (Idx < 0) { @@ -3168,10 +3230,16 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops)); + setValue(&I, DAG.getBuildVector(VT, DL, Ops)); } -void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { +void SelectionDAGBuilder::visitInsertValue(const User &I) { + ArrayRef<unsigned> Indices; + if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I)) + Indices = IV->getIndices(); + else + Indices = cast<ConstantExpr>(&I)->getIndices(); + const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); Type *AggTy = I.getType(); @@ -3179,7 +3247,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { bool IntoUndef = isa<UndefValue>(Op0); bool FromUndef = isa<UndefValue>(Op1); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> AggValueVTs; @@ -3219,13 +3287,19 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { DAG.getVTList(AggValueVTs), Values)); } -void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { +void SelectionDAGBuilder::visitExtractValue(const User &I) { + ArrayRef<unsigned> Indices; + if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I)) + Indices = EV->getIndices(); + else + Indices = cast<ConstantExpr>(&I)->getIndices(); + const Value *Op0 = I.getOperand(0); Type *AggTy = Op0->getType(); Type *ValTy = I.getType(); bool OutOfUndef = isa<UndefValue>(Op0); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValValueVTs; @@ -3281,14 +3355,14 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); - // In an inbouds GEP with an offset that is nonnegative even when + // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds()) Flags.setNoUnsignedWrap(true); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, - DAG.getConstant(Offset, dl, N.getValueType()), &Flags); + DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { MVT PtrTy = @@ -3318,7 +3392,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds()) Flags.setNoUnsignedWrap(true); - N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags); continue; } @@ -3326,7 +3400,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue IdxN = getValue(Idx); if (!IdxN.getValueType().isVector() && VectorWidth) { - MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); + EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth); IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); } @@ -3396,7 +3470,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags); + DAG.getIntPtrConstant(StackAlign - 1, dl), Flags); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, dl, @@ -3459,7 +3533,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA->pointsToConstantMemory(MemoryLocation( + else if (AA && AA->pointsToConstantMemory(MemoryLocation( SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); @@ -3500,7 +3574,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), - &Flags); + Flags); auto MMOFlags = MachineMemOperand::MONone; if (isVolatile) MMOFlags |= MachineMemOperand::MOVolatile; @@ -3510,6 +3584,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { MMOFlags |= MachineMemOperand::MOInvariant; if (isDereferenceable) MMOFlags |= MachineMemOperand::MODereferenceable; + MMOFlags |= TLI.getMMOFlags(I); SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, @@ -3533,8 +3608,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - assert(TLI.supportSwiftError() && + assert(DAG.getTargetLoweringInfo().supportSwiftError() && "call visitStoreToSwiftError when backend supports swifterror"); SmallVector<EVT, 4> ValueVTs; @@ -3547,15 +3621,15 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. - auto &DL = DAG.getDataLayout(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned VReg; bool CreatedVReg; + std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, SDValue(Src.getNode(), Src.getResNo())); DAG.setRoot(CopyNode); - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); + if (CreatedVReg) + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); } void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { @@ -3571,8 +3645,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { Type *Ty = I.getType(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - assert(!AA->pointsToConstantMemory(MemoryLocation( - SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) && + assert((!AA || !AA->pointsToConstantMemory(MemoryLocation( + SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) && "load_from_swift_error should not be constant memory"); SmallVector<EVT, 4> ValueVTs; @@ -3585,7 +3659,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT SDValue L = DAG.getCopyFromReg( getRoot(), getCurSDLoc(), - FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]); + FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first, + ValueVTs[0]); setValue(&I, L); } @@ -3639,6 +3714,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { MMOFlags |= MachineMemOperand::MOVolatile; if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) MMOFlags |= MachineMemOperand::MONonTemporal; + MMOFlags |= TLI.getMMOFlags(I); // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. @@ -3655,7 +3731,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { ChainI = 0; } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); + DAG.getConstant(Offsets[i], dl, PtrVT), Flags); SDValue St = DAG.getStore( Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo); @@ -3853,7 +3929,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. - bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation( + bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation( PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo)); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); @@ -3897,7 +3973,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; if (UniformBase && - AA->pointsToConstantMemory(MemoryLocation( + AA && AA->pointsToConstantMemory(MemoryLocation( BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. @@ -3929,7 +4005,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); AtomicOrdering FailureOrder = I.getFailureOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -3939,7 +4015,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), - /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); + /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID); SDValue OutChain = L.getValue(2); @@ -3965,7 +4041,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; } AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -3976,7 +4052,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), - /* Alignment=*/ 0, Order, Scope); + /* Alignment=*/ 0, Order, SSID); SDValue OutChain = L.getValue(1); @@ -3990,16 +4066,16 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { SDValue Ops[3]; Ops[0] = getRoot(); Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, - TLI.getPointerTy(DAG.getDataLayout())); - Ops[2] = DAG.getConstant(I.getSynchScope(), dl, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFenceOperandTy(DAG.getDataLayout())); + Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl, + TLI.getFenceOperandTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -4017,7 +4093,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { VT.getStoreSize(), I.getAlignment() ? I.getAlignment() : DAG.getEVTAlignment(VT), - AAMDNodes(), nullptr, Scope, Order); + AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = @@ -4034,7 +4110,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -4051,7 +4127,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - Order, Scope); + Order, SSID); DAG.setRoot(OutChain); } @@ -4695,7 +4771,7 @@ static unsigned getUnderlyingArgReg(const SDValue &N) { /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, - DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { + DILocation *DL, int64_t Offset, bool IsDbgDeclare, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; @@ -4709,9 +4785,11 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) return false; + bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. - if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) + int FI = FuncInfo.getArgumentFrameIndex(Arg); + if (FI != INT_MAX) Op = MachineOperand::CreateFI(FI); if (!Op && N.getNode()) { @@ -4722,15 +4800,19 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (PR) Reg = PR; } - if (Reg) + if (Reg) { Op = MachineOperand::CreateReg(Reg, false); + IsIndirect = IsDbgDeclare; + } } if (!Op) { // Check if ValueMap has reg number. DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); - if (VMI != FuncInfo.ValueMap.end()) + if (VMI != FuncInfo.ValueMap.end()) { Op = MachineOperand::CreateReg(VMI->second, false); + IsIndirect = IsDbgDeclare; + } } if (!Op && N.getNode()) @@ -4752,7 +4834,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( else FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addOperand(*Op) + .add(*Op) .addImm(Offset) .addMetadata(Variable) .addMetadata(Expr)); @@ -4764,26 +4846,17 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, DILocalVariable *Variable, DIExpression *Expr, int64_t Offset, - DebugLoc dl, + const DebugLoc &dl, unsigned DbgSDNodeOrder) { - SDDbgValue *SDV; - auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode()); - if (FISDN && Expr->startsWithDeref()) { + if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe // stack slot locations as such instead of as indirectly addressed // locations. - ArrayRef<uint64_t> TrailingElements(Expr->elements_begin() + 1, - Expr->elements_end()); - DIExpression *DerefedDIExpr = - DIExpression::get(*DAG.getContext(), TrailingElements); - int FI = FISDN->getIndex(); - SDV = DAG.getFrameIndexDbgValue(Variable, DerefedDIExpr, FI, 0, dl, - DbgSDNodeOrder); - } else { - SDV = DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, - Offset, dl, DbgSDNodeOrder); + return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), 0, dl, + DbgSDNodeOrder); } - return SDV; + return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, + Offset, dl, DbgSDNodeOrder); } // VisualStudio defines setjmp as _setjmp @@ -4794,9 +4867,9 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, # define setjmp_undefined_for_msvc #endif -/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If -/// we want to emit this as a call to a named external function, return the name -/// otherwise lower it and return null. +/// Lower the call to the specified intrinsic function. If we want to emit this +/// as a call to a named external function, return the name. Otherwise, lower it +/// and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -4897,11 +4970,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { updateDAGForMaybeTailCall(MM); return nullptr; } - case Intrinsic::memcpy_element_atomic: { - SDValue Dst = getValue(I.getArgOperand(0)); - SDValue Src = getValue(I.getArgOperand(1)); - SDValue NumElements = getValue(I.getArgOperand(2)); - SDValue ElementSize = getValue(I.getArgOperand(3)); + case Intrinsic::memcpy_element_unordered_atomic: { + const ElementUnorderedAtomicMemCpyInst &MI = + cast<ElementUnorderedAtomicMemCpyInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); // Emit a library call. TargetLowering::ArgListTy Args; @@ -4912,31 +4986,101 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Entry.Node = Src; Args.push_back(Entry); - - Entry.Ty = I.getArgOperand(2)->getType(); - Entry.Node = NumElements; + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } + case Intrinsic::memmove_element_unordered_atomic: { + auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; Args.push_back(Entry); - - Entry.Ty = Type::getInt32Ty(*DAG.getContext()); - Entry.Node = ElementSize; + + Entry.Node = Src; Args.push_back(Entry); - uint64_t ElementSizeConstant = - cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } + case Intrinsic::memset_element_unordered_atomic: { + auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Val = getValue(MI.getValue()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Ty = Type::getInt8Ty(*DAG.getContext()); + Entry.Node = Val; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); RTLIB::Libcall LibraryCall = - RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant); + RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) report_fatal_error("Unsupported element size"); TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl) - .setChain(getRoot()) - .setCallee(TLI.getLibcallCallingConv(LibraryCall), - Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol( - TLI.getLibcallName(LibraryCall), - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args)); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); DAG.setRoot(CallResult.second); @@ -4960,6 +5104,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + // Byval arguments with frame indices were already handled after argument + // lowering and before isel. + const auto *Arg = + dyn_cast<Argument>(Address->stripInBoundsConstantOffsets()); + if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX) + return nullptr; + SDValue &N = NodeMap[Address]; if (!N.getNode() && isa<Argument>(Address)) // Check unused arguments map. @@ -4978,8 +5129,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else if (isa<Argument>(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, - N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N); return nullptr; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), @@ -4989,22 +5139,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N)) { - // If variable is pinned by a alloca in dominating bb then - // use StaticAllocaMap. - if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { - if (AI->getParent() != DI.getParent()) { - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI != FuncInfo.StaticAllocaMap.end()) { - SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second, - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); - return nullptr; - } - } - } DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } @@ -5026,45 +5162,33 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); - } else { - // Do not use getValue() in here; we don't want to generate code at - // this point if it hasn't been done yet. - SDValue N = NodeMap[V]; - if (!N.getNode() && isa<Argument>(V)) - // Check unused arguments map. - N = UnusedArgNodeMap[V]; - if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, - false, N)) { - SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, N.getNode(), false); - } - } else if (!V->use_empty() ) { - // Do not call getValue(V) yet, as we don't want to generate code. - // Remember it for later. - DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); - DanglingDebugInfoMap[V] = DDI; - } else { - // We may expand this to cover more cases. One case where we have no - // data available is an unreferenced parameter. - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - } + return nullptr; } - // Build a debug info table entry. - if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) - V = BCI->getOperand(0); - const AllocaInst *AI = dyn_cast<AllocaInst>(V); - // Don't handle byval struct arguments or VLAs, for example. - if (!AI) { - DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); - DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. + SDValue N = NodeMap[V]; + if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. + N = UnusedArgNodeMap[V]; + if (N.getNode()) { + if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false, + N)) + return nullptr; + SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); return nullptr; } - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI == FuncInfo.StaticAllocaMap.end()) - return nullptr; // VLAs. + + if (!V->use_empty() ) { + // Do not call getValue(V) yet, as we don't want to generate code. + // Remember it for later. + DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); + DanglingDebugInfoMap[V] = DDI; + return nullptr; + } + + DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return nullptr; } @@ -5202,7 +5326,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); + ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, @@ -5301,6 +5425,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return nullptr; + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: + visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); + return nullptr; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -5537,7 +5680,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trap: { StringRef TrapFuncName = I.getAttributes() - .getAttribute(AttributeSet::FunctionIndex, "trap-func-name") + .getAttribute(AttributeList::FunctionIndex, "trap-func-name") .getValueAsString(); if (TrapFuncName.empty()) { ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? @@ -5548,7 +5691,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee( + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( CallingConv::C, I.getType(), DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy(DAG.getDataLayout())), @@ -5629,7 +5772,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = - DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true); + DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); @@ -5690,7 +5833,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { int FI = FuncInfo.StaticAllocaMap[Slot]; MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( - GlobalValue::getRealLinkageName(MF.getName()), Idx); + GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, TII->get(TargetOpcode::LOCAL_ESCAPE)) .addSym(FrameAllocSym) @@ -5711,7 +5854,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( - GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); + GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); // Create a MCSymbol for the label to avoid any target lowering // that would make this PC relative. @@ -5742,13 +5885,142 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, N); return nullptr; } + case Intrinsic::xray_customevent: { + // Here we want to make sure that the intrinsic behaves as if it has a + // specific calling convention, and only for x86_64. + // FIXME: Support other platforms later. + const auto &Triple = DAG.getTarget().getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return nullptr; + SDLoc DL = getCurSDLoc(); + SmallVector<SDValue, 8> Ops; + + // We want to say that we always want the arguments in registers. + SDValue LogEntryVal = getValue(I.getArgOperand(0)); + SDValue StrSizeVal = getValue(I.getArgOperand(1)); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(LogEntryVal); + Ops.push_back(StrSizeVal); + Ops.push_back(Chain); + + // We need to enforce the calling convention for the callsite, so that + // argument ordering is enforced correctly, and that register allocation can + // see that some registers may be assumed clobbered and have to preserve + // them across calls to the intrinsic. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL, + DL, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + DAG.setRoot(patchableNode); + setValue(&I, patchableNode); + return nullptr; + } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; + + case Intrinsic::experimental_vector_reduce_fadd: + case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: { + visitVectorReduce(I, Intrinsic); + return nullptr; + } + } } +void SelectionDAGBuilder::visitConstrainedFPIntrinsic( + const ConstrainedFPIntrinsic &FPI) { + SDLoc sdl = getCurSDLoc(); + unsigned Opcode; + switch (FPI.getIntrinsicID()) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::experimental_constrained_fadd: + Opcode = ISD::STRICT_FADD; + break; + case Intrinsic::experimental_constrained_fsub: + Opcode = ISD::STRICT_FSUB; + break; + case Intrinsic::experimental_constrained_fmul: + Opcode = ISD::STRICT_FMUL; + break; + case Intrinsic::experimental_constrained_fdiv: + Opcode = ISD::STRICT_FDIV; + break; + case Intrinsic::experimental_constrained_frem: + Opcode = ISD::STRICT_FREM; + break; + case Intrinsic::experimental_constrained_sqrt: + Opcode = ISD::STRICT_FSQRT; + break; + case Intrinsic::experimental_constrained_pow: + Opcode = ISD::STRICT_FPOW; + break; + case Intrinsic::experimental_constrained_powi: + Opcode = ISD::STRICT_FPOWI; + break; + case Intrinsic::experimental_constrained_sin: + Opcode = ISD::STRICT_FSIN; + break; + case Intrinsic::experimental_constrained_cos: + Opcode = ISD::STRICT_FCOS; + break; + case Intrinsic::experimental_constrained_exp: + Opcode = ISD::STRICT_FEXP; + break; + case Intrinsic::experimental_constrained_exp2: + Opcode = ISD::STRICT_FEXP2; + break; + case Intrinsic::experimental_constrained_log: + Opcode = ISD::STRICT_FLOG; + break; + case Intrinsic::experimental_constrained_log10: + Opcode = ISD::STRICT_FLOG10; + break; + case Intrinsic::experimental_constrained_log2: + Opcode = ISD::STRICT_FLOG2; + break; + case Intrinsic::experimental_constrained_rint: + Opcode = ISD::STRICT_FRINT; + break; + case Intrinsic::experimental_constrained_nearbyint: + Opcode = ISD::STRICT_FNEARBYINT; + break; + } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Chain = getRoot(); + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); + ValueVTs.push_back(MVT::Other); // Out chain + + SDVTList VTs = DAG.getVTList(ValueVTs); + SDValue Result; + if (FPI.isUnaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)) }); + else + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)) }); + + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + DAG.setRoot(OutChain); + SDValue FPResult = Result.getValue(0); + setValue(&FPI, FPResult); +} + std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { @@ -5827,7 +6099,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Type *RetTy = CS.getType(); TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); const Value *SwiftErrorVal = nullptr; @@ -5843,6 +6114,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { + TargetLowering::ArgListEntry Entry; const Value *V = *i; // Skip empty types @@ -5852,24 +6124,25 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); - // Skip the first return-type Attribute to get to params. - Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Entry.setAttributes(&CS, i - CS.arg_begin()); // Use swifterror virtual register as input to the call. - if (Entry.isSwiftError && TLI.supportSwiftError()) { + if (Entry.IsSwiftError && TLI.supportSwiftError()) { SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. - Entry.Node = - DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, V), - EVT(TLI.getPointerTy(DL))); + Entry.Node = DAG.getRegister(FuncInfo + .getOrCreateSwiftErrorVRegUseAt( + CS.getInstruction(), FuncInfo.MBB, V) + .first, + EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); // If we have an explicit sret argument that is an Instruction, (i.e., it // might point to function-local memory), we can't meaningfully tail-call. - if (Entry.isSRet && isa<Instruction>(V)) + if (Entry.IsSRet && isa<Instruction>(V)) isTailCall = false; } @@ -5903,38 +6176,29 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned VReg; bool CreatedVReg; + std::tie(VReg, CreatedVReg) = + FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction()); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); // We update the virtual register for the actual swifterror argument. - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); + if (CreatedVReg) + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); DAG.setRoot(CopyNode); } } -/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the -/// value is equal or not-equal to zero. -static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { - for (const User *U : V->users()) { - if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) - if (IC->isEquality()) - if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, - Type *LoadTy, SelectionDAGBuilder &Builder) { // Check to see if this load can be trivially constant folded, e.g. if the // input is from a string literal. if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { // Cast pointer to the type we really want to load. + Type *LoadTy = + Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits()); + if (LoadVT.isVector()) + LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements()); + LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), PointerType::getUnqual(LoadTy)); @@ -5949,7 +6213,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, bool ConstantMemory = false; // Do not serialize (non-volatile) loads of constant memory with anything. - if (Builder.AA->pointsToConstantMemory(PtrVal)) { + if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) { Root = Builder.DAG.getEntryNode(); ConstantMemory = true; } else { @@ -5967,8 +6231,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, return LoadVal; } -/// processIntegerCallValue - Record the value for an instruction that -/// produces an integer result, converting the type where necessary. +/// Record the value for an instruction that produces an integer result, +/// converting the type where necessary. void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, SDValue Value, bool IsSigned) { @@ -5981,20 +6245,13 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, setValue(&I, Value); } -/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. -/// If so, return true and lower it, otherwise return false and it will be -/// lowered like a normal call. +/// See if we can lower a memcmp call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { - // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) - if (I.getNumArgOperands() != 3) - return false; - const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); - if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || - !I.getArgOperand(2)->getType()->isIntegerTy() || - !I.getType()->isIntegerTy()) - return false; - const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); if (CSize && CSize->getZExtValue() == 0) { @@ -6005,11 +6262,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { } const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); - std::pair<SDValue, SDValue> Res = - TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), - getValue(LHS), getValue(RHS), getValue(Size), - MachinePointerInfo(LHS), - MachinePointerInfo(RHS)); + std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp( + DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), + getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, true); PendingLoads.push_back(Res.second); @@ -6018,88 +6273,79 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { - bool ActuallyDoIt = true; - MVT LoadVT; - Type *LoadTy; - switch (CSize->getZExtValue()) { - default: - LoadVT = MVT::Other; - LoadTy = nullptr; - ActuallyDoIt = false; - break; - case 2: - LoadVT = MVT::i16; - LoadTy = Type::getInt16Ty(CSize->getContext()); - break; - case 4: - LoadVT = MVT::i32; - LoadTy = Type::getInt32Ty(CSize->getContext()); - break; - case 8: - LoadVT = MVT::i64; - LoadTy = Type::getInt64Ty(CSize->getContext()); - break; - /* - case 16: - LoadVT = MVT::v4i32; - LoadTy = Type::getInt32Ty(CSize->getContext()); - LoadTy = VectorType::get(LoadTy, 4); - break; - */ - } - - // This turns into unaligned loads. We only do this if the target natively - // supports the MVT we'll be loading or if it is small enough (<= 4) that - // we'll only produce a small number of byte loads. + if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I)) + return false; - // Require that we can find a legal MVT, and only do this if the target - // supports unaligned loads of that type. Expanding into byte loads would - // bloat the code. + // If the target has a fast compare for the given size, it will return a + // preferred load type for that size. Require that the load VT is legal and + // that the target supports unaligned loads of that type. Otherwise, return + // INVALID. + auto hasFastLoadsAndCompare = [&](unsigned NumBits) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (ActuallyDoIt && CSize->getZExtValue() > 4) { - unsigned DstAS = LHS->getType()->getPointerAddressSpace(); - unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); + MVT LVT = TLI.hasFastEqualityCompare(NumBits); + if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) { // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. // TODO: Check alignment of src and dest ptrs. - if (!TLI.isTypeLegal(LoadVT) || - !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || - !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS)) - ActuallyDoIt = false; + unsigned DstAS = LHS->getType()->getPointerAddressSpace(); + unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); + if (!TLI.isTypeLegal(LVT) || + !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) || + !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS)) + LVT = MVT::INVALID_SIMPLE_VALUE_TYPE; } - if (ActuallyDoIt) { - SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); - SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); + return LVT; + }; - SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, - ISD::SETNE); - processIntegerCallValue(I, Res, false); - return true; - } + // This turns into unaligned loads. We only do this if the target natively + // supports the MVT we'll be loading or if it is small enough (<= 4) that + // we'll only produce a small number of byte loads. + MVT LoadVT; + unsigned NumBitsToCompare = CSize->getZExtValue() * 8; + switch (NumBitsToCompare) { + default: + return false; + case 16: + LoadVT = MVT::i16; + break; + case 32: + LoadVT = MVT::i32; + break; + case 64: + case 128: + case 256: + LoadVT = hasFastLoadsAndCompare(NumBitsToCompare); + break; } + if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE) + return false; - return false; + SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this); + SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this); + + // Bitcast to a wide integer type if the loads are vectors. + if (LoadVT.isVector()) { + EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits()); + LoadL = DAG.getBitcast(CmpVT, LoadL); + LoadR = DAG.getBitcast(CmpVT, LoadR); + } + + SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE); + processIntegerCallValue(I, Cmp, false); + return true; } -/// visitMemChrCall -- See if we can lower a memchr call into an optimized -/// form. If so, return true and lower it, otherwise return false and it -/// will be lowered like a normal call. +/// See if we can lower a memchr call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { - // Verify that the prototype makes sense. void *memchr(void *, int, size_t) - if (I.getNumArgOperands() != 3) - return false; - const Value *Src = I.getArgOperand(0); const Value *Char = I.getArgOperand(1); const Value *Length = I.getArgOperand(2); - if (!Src->getType()->isPointerTy() || - !Char->getType()->isIntegerTy() || - !Length->getType()->isIntegerTy() || - !I.getType()->isPointerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = @@ -6115,15 +6361,12 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { return false; } -/// -/// visitMemPCpyCall -- lower a mempcpy call as a memcpy followed by code to -/// to adjust the dst pointer by the size of the copied memory. +/// See if we can lower a mempcpy call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { - - // Verify argument count: void *mempcpy(void *, const void *, size_t) - if (I.getNumArgOperands() != 3) - return false; - SDValue Dst = getValue(I.getArgOperand(0)); SDValue Src = getValue(I.getArgOperand(1)); SDValue Size = getValue(I.getArgOperand(2)); @@ -6158,19 +6401,13 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { return true; } -/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an -/// optimized form. If so, return true and lower it, otherwise return false -/// and it will be lowered like a normal call. +/// See if we can lower a strcpy call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { - // Verify that the prototype makes sense. char *strcpy(char *, char *) - if (I.getNumArgOperands() != 2) - return false; - const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); - if (!Arg0->getType()->isPointerTy() || - !Arg1->getType()->isPointerTy() || - !I.getType()->isPointerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = @@ -6187,19 +6424,13 @@ bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { return false; } -/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. -/// If so, return true and lower it, otherwise return false and it will be -/// lowered like a normal call. +/// See if we can lower a strcmp call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { - // Verify that the prototype makes sense. int strcmp(void*,void*) - if (I.getNumArgOperands() != 2) - return false; - const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); - if (!Arg0->getType()->isPointerTy() || - !Arg1->getType()->isPointerTy() || - !I.getType()->isIntegerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = @@ -6216,17 +6447,13 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { return false; } -/// visitStrLenCall -- See if we can lower a strlen call into an optimized -/// form. If so, return true and lower it, otherwise return false and it -/// will be lowered like a normal call. +/// See if we can lower a strlen call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { - // Verify that the prototype makes sense. size_t strlen(char *) - if (I.getNumArgOperands() != 1) - return false; - const Value *Arg0 = I.getArgOperand(0); - if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = @@ -6241,19 +6468,13 @@ bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { return false; } -/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized -/// form. If so, return true and lower it, otherwise return false and it -/// will be lowered like a normal call. +/// See if we can lower a strnlen call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { - // Verify that the prototype makes sense. size_t strnlen(char *, size_t) - if (I.getNumArgOperands() != 2) - return false; - const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); - if (!Arg0->getType()->isPointerTy() || - !Arg1->getType()->isIntegerTy() || - !I.getType()->isIntegerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = @@ -6269,16 +6490,15 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { return false; } -/// visitUnaryFloatCall - If a call instruction is a unary floating-point -/// operation (as expected), translate it to an SDNode with the specified opcode -/// and return true. +/// See if we can lower a unary floating-point operation into an SDNode with +/// the specified Opcode. If so, return true and lower it, otherwise return +/// false and it will be lowered like a normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, unsigned Opcode) { - // Sanity check that it really is a unary floating-point call. - if (I.getNumArgOperands() != 1 || - !I.getArgOperand(0)->getType()->isFloatingPointTy() || - I.getType() != I.getArgOperand(0)->getType() || - !I.onlyReadsMemory()) + // We already checked this call's prototype; verify it doesn't modify errno. + if (!I.onlyReadsMemory()) return false; SDValue Tmp = getValue(I.getArgOperand(0)); @@ -6286,17 +6506,15 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, return true; } -/// visitBinaryFloatCall - If a call instruction is a binary floating-point -/// operation (as expected), translate it to an SDNode with the specified opcode -/// and return true. +/// See if we can lower a binary floating-point operation into an SDNode with +/// the specified Opcode. If so, return true and lower it. Otherwise return +/// false, and it will be lowered like a normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, unsigned Opcode) { - // Sanity check that it really is a binary floating-point call. - if (I.getNumArgOperands() != 2 || - !I.getArgOperand(0)->getType()->isFloatingPointTy() || - I.getType() != I.getArgOperand(0)->getType() || - I.getType() != I.getArgOperand(1)->getType() || - !I.onlyReadsMemory()) + // We already checked this call's prototype; verify it doesn't modify errno. + if (!I.onlyReadsMemory()) return false; SDValue Tmp0 = getValue(I.getArgOperand(0)); @@ -6336,20 +6554,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. Don't do the check if marked as nobuiltin for // some reason. - LibFunc::Func Func; + LibFunc Func; if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && - LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->getLibFunc(*F, Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; - case LibFunc::copysign: - case LibFunc::copysignf: - case LibFunc::copysignl: - if (I.getNumArgOperands() == 2 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.getType() == I.getArgOperand(1)->getType() && - I.onlyReadsMemory()) { + case LibFunc_copysign: + case LibFunc_copysignf: + case LibFunc_copysignl: + // We already checked this call's prototype; verify it doesn't modify + // errno. + if (I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), @@ -6357,122 +6573,122 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } break; - case LibFunc::fabs: - case LibFunc::fabsf: - case LibFunc::fabsl: + case LibFunc_fabs: + case LibFunc_fabsf: + case LibFunc_fabsl: if (visitUnaryFloatCall(I, ISD::FABS)) return; break; - case LibFunc::fmin: - case LibFunc::fminf: - case LibFunc::fminl: + case LibFunc_fmin: + case LibFunc_fminf: + case LibFunc_fminl: if (visitBinaryFloatCall(I, ISD::FMINNUM)) return; break; - case LibFunc::fmax: - case LibFunc::fmaxf: - case LibFunc::fmaxl: + case LibFunc_fmax: + case LibFunc_fmaxf: + case LibFunc_fmaxl: if (visitBinaryFloatCall(I, ISD::FMAXNUM)) return; break; - case LibFunc::sin: - case LibFunc::sinf: - case LibFunc::sinl: + case LibFunc_sin: + case LibFunc_sinf: + case LibFunc_sinl: if (visitUnaryFloatCall(I, ISD::FSIN)) return; break; - case LibFunc::cos: - case LibFunc::cosf: - case LibFunc::cosl: + case LibFunc_cos: + case LibFunc_cosf: + case LibFunc_cosl: if (visitUnaryFloatCall(I, ISD::FCOS)) return; break; - case LibFunc::sqrt: - case LibFunc::sqrtf: - case LibFunc::sqrtl: - case LibFunc::sqrt_finite: - case LibFunc::sqrtf_finite: - case LibFunc::sqrtl_finite: + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: + case LibFunc_sqrt_finite: + case LibFunc_sqrtf_finite: + case LibFunc_sqrtl_finite: if (visitUnaryFloatCall(I, ISD::FSQRT)) return; break; - case LibFunc::floor: - case LibFunc::floorf: - case LibFunc::floorl: + case LibFunc_floor: + case LibFunc_floorf: + case LibFunc_floorl: if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; break; - case LibFunc::nearbyint: - case LibFunc::nearbyintf: - case LibFunc::nearbyintl: + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_nearbyintl: if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) return; break; - case LibFunc::ceil: - case LibFunc::ceilf: - case LibFunc::ceill: + case LibFunc_ceil: + case LibFunc_ceilf: + case LibFunc_ceill: if (visitUnaryFloatCall(I, ISD::FCEIL)) return; break; - case LibFunc::rint: - case LibFunc::rintf: - case LibFunc::rintl: + case LibFunc_rint: + case LibFunc_rintf: + case LibFunc_rintl: if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; - case LibFunc::round: - case LibFunc::roundf: - case LibFunc::roundl: + case LibFunc_round: + case LibFunc_roundf: + case LibFunc_roundl: if (visitUnaryFloatCall(I, ISD::FROUND)) return; break; - case LibFunc::trunc: - case LibFunc::truncf: - case LibFunc::truncl: + case LibFunc_trunc: + case LibFunc_truncf: + case LibFunc_truncl: if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; break; - case LibFunc::log2: - case LibFunc::log2f: - case LibFunc::log2l: + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2l: if (visitUnaryFloatCall(I, ISD::FLOG2)) return; break; - case LibFunc::exp2: - case LibFunc::exp2f: - case LibFunc::exp2l: + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: if (visitUnaryFloatCall(I, ISD::FEXP2)) return; break; - case LibFunc::memcmp: + case LibFunc_memcmp: if (visitMemCmpCall(I)) return; break; - case LibFunc::mempcpy: + case LibFunc_mempcpy: if (visitMemPCpyCall(I)) return; break; - case LibFunc::memchr: + case LibFunc_memchr: if (visitMemChrCall(I)) return; break; - case LibFunc::strcpy: + case LibFunc_strcpy: if (visitStrCpyCall(I, false)) return; break; - case LibFunc::stpcpy: + case LibFunc_stpcpy: if (visitStrCpyCall(I, true)) return; break; - case LibFunc::strcmp: + case LibFunc_strcmp: if (visitStrCmpCall(I)) return; break; - case LibFunc::strlen: + case LibFunc_strlen: if (visitStrLenCall(I)) return; break; - case LibFunc::strnlen: + case LibFunc_strnlen: if (visitStrNLenCall(I)) return; break; @@ -6648,7 +6864,7 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, unsigned Align = DL.getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL)); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(MF, SSFI)); OpInfo.CallOperand = StackSlot; @@ -6671,12 +6887,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, MachineFunction &MF = DAG.getMachineFunction(); SmallVector<unsigned, 4> Regs; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // If this is a constraint for a single physreg, or a constraint for a // register class, find it. std::pair<unsigned, const TargetRegisterClass *> PhysReg = - TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), - OpInfo.ConstraintCode, + TLI.getRegForInlineAsmConstraint(&TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); unsigned NumRegs = 1; @@ -6684,12 +6900,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, // If this is a FP input in an integer register (or visa versa) insert a bit // cast of the input value. More generally, handle any case where the input // value disagrees with the register class we plan to stick this in. - if (OpInfo.Type == InlineAsm::isInput && - PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { + if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && + !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) { // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). - MVT RegVT = *PhysReg.second->vt_begin(); + MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second); if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); @@ -6717,12 +6933,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, if (unsigned AssignedReg = PhysReg.first) { const TargetRegisterClass *RC = PhysReg.second; if (OpInfo.ConstraintVT == MVT::Other) - ValueVT = *RC->vt_begin(); + ValueVT = *TRI.legalclasstypes_begin(*RC); // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to // remember that AX is actually i16 to get the right extension. - RegVT = *RC->vt_begin(); + RegVT = *TRI.legalclasstypes_begin(*RC); // This is a explicit reference to a physical register. Regs.push_back(AssignedReg); @@ -6748,7 +6964,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, // Otherwise, if this was a reference to an LLVM register class, create vregs // for this reference. if (const TargetRegisterClass *RC = PhysReg.second) { - RegVT = *RC->vt_begin(); + RegVT = *TRI.legalclasstypes_begin(*RC); if (OpInfo.ConstraintVT == MVT::Other) ValueVT = RegVT; @@ -7085,8 +7301,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, - Chain, &Flag, CS.getInstruction()); + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, + CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); @@ -7361,7 +7577,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( // Populate the argument list. // Attributes for args start at offset 1, after the return attribute. - for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) { const Value *V = CS->getOperand(ArgI); @@ -7370,7 +7586,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( TargetLowering::ArgListEntry Entry; Entry.Node = getValue(V); Entry.Ty = V->getType(); - Entry.setAttributes(&CS, AttrI); + Entry.setAttributes(&CS, ArgIdx); Args.push_back(Entry); } @@ -7411,7 +7627,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); Ops.push_back(Builder.DAG.getTargetFrameIndex( - FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout()))); + FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout()))); } else Ops.push_back(OpVal); } @@ -7437,11 +7653,11 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // have to worry about calling conventions and target specific lowering code. // Instead we perform the call lowering right here. // - // chain, flag = CALLSEQ_START(chain, 0) + // chain, flag = CALLSEQ_START(chain, 0, 0) // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) // chain, flag = CALLSEQ_END(chain, 0, 0, flag) // - Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); + Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL); InFlag = Chain.getValue(1); // Add the <id> and <numBytes> constants. @@ -7631,9 +7847,79 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, FuncInfo.MF->getFrameInfo().setHasPatchPoint(); } -/// Returns an AttributeSet representing the attributes applied to the return +void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, + unsigned Intrinsic) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2; + if (I.getNumArgOperands() > 1) + Op2 = getValue(I.getArgOperand(1)); + SDLoc dl = getCurSDLoc(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDValue Res; + FastMathFlags FMF; + if (isa<FPMathOperator>(I)) + FMF = I.getFastMathFlags(); + SDNodeFlags SDFlags; + SDFlags.setNoNaNs(FMF.noNaNs()); + + switch (Intrinsic) { + case Intrinsic::experimental_vector_reduce_fadd: + if (FMF.unsafeAlgebra()) + Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); + else + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); + break; + case Intrinsic::experimental_vector_reduce_fmul: + if (FMF.unsafeAlgebra()) + Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); + else + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); + break; + case Intrinsic::experimental_vector_reduce_add: + Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_mul: + Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_and: + Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_or: + Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_xor: + Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_smax: + Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_smin: + Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_umax: + Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_umin: + Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_fmax: { + Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); + break; + } + case Intrinsic::experimental_vector_reduce_fmin: { + Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); + break; + } + default: + llvm_unreachable("Unhandled vector reduce intrinsic"); + } + setValue(&I, Res); +} + +/// Returns an AttributeList representing the attributes applied to the return /// value of the given call. -static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { +static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { SmallVector<Attribute::AttrKind, 2> Attrs; if (CLI.RetSExt) Attrs.push_back(Attribute::SExt); @@ -7642,8 +7928,8 @@ static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { if (CLI.IsInReg) Attrs.push_back(Attribute::InReg); - return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, - Attrs); + return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex, + Attrs); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo @@ -7660,6 +7946,22 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { auto &DL = CLI.DAG.getDataLayout(); ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); + if (CLI.IsPostTypeLegalization) { + // If we are lowering a libcall after legalization, split the return type. + SmallVector<EVT, 4> OldRetTys = std::move(RetTys); + SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets); + for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { + EVT RetVT = OldRetTys[i]; + uint64_t Offset = OldOffsets[i]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); + unsigned RegisterVTSize = RegisterVT.getSizeInBits(); + RetTys.append(NumRegs, RegisterVT); + for (unsigned j = 0; j != NumRegs; ++j) + Offsets.push_back(Offset + j * RegisterVTSize); + } + } + SmallVector<ISD::OutputArg, 4> Outs; GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); @@ -7679,19 +7981,19 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); - DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL)); ArgListEntry Entry; Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isInReg = false; - Entry.isSRet = true; - Entry.isNest = false; - Entry.isByVal = false; - Entry.isReturned = false; - Entry.isSwiftSelf = false; - Entry.isSwiftError = false; + Entry.IsSExt = false; + Entry.IsZExt = false; + Entry.IsInReg = false; + Entry.IsSRet = true; + Entry.IsNest = false; + Entry.IsByVal = false; + Entry.IsReturned = false; + Entry.IsSwiftSelf = false; + Entry.IsSwiftError = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); @@ -7702,8 +8004,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } else { for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT RegisterVT = + getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumRegs = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; @@ -7724,7 +8028,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ArgListTy &Args = CLI.getArgs(); if (supportSwiftError()) { for (unsigned i = 0, e = Args.size(); i != e; ++i) { - if (Args[i].isSwiftError) { + if (Args[i].IsSwiftError) { ISD::InputArg MyFlags; MyFlags.VT = getPointerTy(DL); MyFlags.ArgVT = EVT(getPointerTy(DL)); @@ -7740,8 +8044,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); + // FIXME: Split arguments if CLI.IsPostTypeLegalization Type *FinalType = Args[i].Ty; - if (Args[i].isByVal) + if (Args[i].IsByVal) FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( FinalType, CLI.CallConv, CLI.IsVarArg); @@ -7752,13 +8057,17 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - if (Args[i].isZExt) + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); + + if (Args[i].IsZExt) Flags.setZExt(); - if (Args[i].isSExt) + if (Args[i].IsSExt) Flags.setSExt(); - if (Args[i].isInReg) { + if (Args[i].IsInReg) { // If we are using vectorcall calling convention, a structure that is // passed InReg - is surely an HVA if (CLI.CallConv == CallingConv::X86_VectorCall && @@ -7771,15 +8080,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Set InReg Flag Flags.setInReg(); } - if (Args[i].isSRet) + if (Args[i].IsSRet) Flags.setSRet(); - if (Args[i].isSwiftSelf) + if (Args[i].IsSwiftSelf) Flags.setSwiftSelf(); - if (Args[i].isSwiftError) + if (Args[i].IsSwiftError) Flags.setSwiftError(); - if (Args[i].isByVal) + if (Args[i].IsByVal) Flags.setByVal(); - if (Args[i].isInAlloca) { + if (Args[i].IsInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated @@ -7788,7 +8097,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // in the various CC lowering callbacks. Flags.setByVal(); } - if (Args[i].isByVal || Args[i].isInAlloca) { + if (Args[i].IsByVal || Args[i].IsInAlloca) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); @@ -7801,24 +8110,25 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { FrameAlign = getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } - if (Args[i].isNest) + if (Args[i].IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); - MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumParts = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); SmallVector<SDValue, 4> Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (Args[i].isSExt) + if (Args[i].IsSExt) ExtendKind = ISD::SIGN_EXTEND; - else if (Args[i].isZExt) + else if (Args[i].IsZExt) ExtendKind = ISD::ZERO_EXTEND; // Conservatively only handle 'returned' on non-vectors for now - if (Args[i].isReturned && !Op.getValueType().isVector()) { + if (Args[i].IsReturned && !Op.getValueType().isVector()) { assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && "unexpected use of 'returned'"); // Before passing 'returned' to the target lowering code, ensure that @@ -7832,13 +8142,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // parameter extension method is not compatible with the return // extension method if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || - (ExtendKind != ISD::ANY_EXTEND && - CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) - Flags.setReturned(); + (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt && + CLI.RetZExt == Args[i].IsZExt)) + Flags.setReturned(); } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, - CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); + CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind, + true); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -7916,7 +8227,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, - PtrVT), &Flags); + PtrVT), Flags); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), @@ -7938,12 +8249,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT RegisterVT = + getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumRegs = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr, - AssertOp)); + AssertOp, true)); CurReg += NumRegs; } @@ -7979,8 +8292,11 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // If this is an InlineAsm we have to match the registers required, not the + // notional registers required by the type. + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType()); + V->getType(), isABIRegCopy(V)); SDValue Chain = DAG.getEntryNode(); ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == @@ -8010,6 +8326,173 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return true; } +typedef DenseMap<const Argument *, + std::pair<const AllocaInst *, const StoreInst *>> + ArgCopyElisionMapTy; + +/// Scan the entry block of the function in FuncInfo for arguments that look +/// like copies into a local alloca. Record any copied arguments in +/// ArgCopyElisionCandidates. +static void +findArgumentCopyElisionCandidates(const DataLayout &DL, + FunctionLoweringInfo *FuncInfo, + ArgCopyElisionMapTy &ArgCopyElisionCandidates) { + // Record the state of every static alloca used in the entry block. Argument + // allocas are all used in the entry block, so we need approximately as many + // entries as we have arguments. + enum StaticAllocaInfo { Unknown, Clobbered, Elidable }; + SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas; + unsigned NumArgs = FuncInfo->Fn->arg_size(); + StaticAllocas.reserve(NumArgs * 2); + + auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * { + if (!V) + return nullptr; + V = V->stripPointerCasts(); + const auto *AI = dyn_cast<AllocaInst>(V); + if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI)) + return nullptr; + auto Iter = StaticAllocas.insert({AI, Unknown}); + return &Iter.first->second; + }; + + // Look for stores of arguments to static allocas. Look through bitcasts and + // GEPs to handle type coercions, as long as the alloca is fully initialized + // by the store. Any non-store use of an alloca escapes it and any subsequent + // unanalyzed store might write it. + // FIXME: Handle structs initialized with multiple stores. + for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) { + // Look for stores, and handle non-store uses conservatively. + const auto *SI = dyn_cast<StoreInst>(&I); + if (!SI) { + // We will look through cast uses, so ignore them completely. + if (I.isCast()) + continue; + // Ignore debug info intrinsics, they don't escape or store to allocas. + if (isa<DbgInfoIntrinsic>(I)) + continue; + // This is an unknown instruction. Assume it escapes or writes to all + // static alloca operands. + for (const Use &U : I.operands()) { + if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U)) + *Info = StaticAllocaInfo::Clobbered; + } + continue; + } + + // If the stored value is a static alloca, mark it as escaped. + if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand())) + *Info = StaticAllocaInfo::Clobbered; + + // Check if the destination is a static alloca. + const Value *Dst = SI->getPointerOperand()->stripPointerCasts(); + StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst); + if (!Info) + continue; + const AllocaInst *AI = cast<AllocaInst>(Dst); + + // Skip allocas that have been initialized or clobbered. + if (*Info != StaticAllocaInfo::Unknown) + continue; + + // Check if the stored value is an argument, and that this store fully + // initializes the alloca. Don't elide copies from the same argument twice. + const Value *Val = SI->getValueOperand()->stripPointerCasts(); + const auto *Arg = dyn_cast<Argument>(Val); + if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() || + Arg->getType()->isEmptyTy() || + DL.getTypeStoreSize(Arg->getType()) != + DL.getTypeAllocSize(AI->getAllocatedType()) || + ArgCopyElisionCandidates.count(Arg)) { + *Info = StaticAllocaInfo::Clobbered; + continue; + } + + DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n'); + + // Mark this alloca and store for argument copy elision. + *Info = StaticAllocaInfo::Elidable; + ArgCopyElisionCandidates.insert({Arg, {AI, SI}}); + + // Stop scanning if we've seen all arguments. This will happen early in -O0 + // builds, which is useful, because -O0 builds have large entry blocks and + // many allocas. + if (ArgCopyElisionCandidates.size() == NumArgs) + break; + } +} + +/// Try to elide argument copies from memory into a local alloca. Succeeds if +/// ArgVal is a load from a suitable fixed stack object. +static void tryToElideArgumentCopy( + FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains, + DenseMap<int, int> &ArgCopyElisionFrameIndexMap, + SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs, + ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg, + SDValue ArgVal, bool &ArgHasUses) { + // Check if this is a load from a fixed stack object. + auto *LNode = dyn_cast<LoadSDNode>(ArgVal); + if (!LNode) + return; + auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()); + if (!FINode) + return; + + // Check that the fixed stack object is the right size and alignment. + // Look at the alignment that the user wrote on the alloca instead of looking + // at the stack object. + auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg); + assert(ArgCopyIter != ArgCopyElisionCandidates.end()); + const AllocaInst *AI = ArgCopyIter->second.first; + int FixedIndex = FINode->getIndex(); + int &AllocaIndex = FuncInfo->StaticAllocaMap[AI]; + int OldIndex = AllocaIndex; + MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo(); + if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { + DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack " + "object size\n"); + return; + } + unsigned RequiredAlignment = AI->getAlignment(); + if (!RequiredAlignment) { + RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment( + AI->getAllocatedType()); + } + if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { + DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " + "greater than stack argument alignment (" + << RequiredAlignment << " vs " + << MFI.getObjectAlignment(FixedIndex) << ")\n"); + return; + } + + // Perform the elision. Delete the old stack object and replace its only use + // in the variable info map. Mark the stack object as mutable. + DEBUG({ + dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' + << " Replacing frame index " << OldIndex << " with " << FixedIndex + << '\n'; + }); + MFI.RemoveStackObject(OldIndex); + MFI.setIsImmutableObjectIndex(FixedIndex, false); + AllocaIndex = FixedIndex; + ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex}); + Chains.push_back(ArgVal.getValue(1)); + + // Avoid emitting code for the store implementing the copy. + const StoreInst *SI = ArgCopyIter->second.second; + ElidedArgCopyInstrs.insert(SI); + + // Check for uses of the argument again so that we can avoid exporting ArgVal + // if it is't used by anything other than the store. + for (const Value *U : Arg.users()) { + if (U != SI) { + ArgHasUses = true; + break; + } + } +} + void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); @@ -8032,16 +8515,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Ins.push_back(RetArg); } + // Look for stores of arguments to static allocas. Mark such arguments with a + // flag to ask the target to give us the memory location of that argument if + // available. + ArgCopyElisionMapTy ArgCopyElisionCandidates; + findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates); + // Set up the incoming argument description vector. - unsigned Idx = 1; - for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); - I != E; ++I, ++Idx) { + for (const Argument &Arg : F.args()) { + unsigned ArgNo = Arg.getArgNo(); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); - bool isArgValueUsed = !I->use_empty(); + ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); + bool isArgValueUsed = !Arg.use_empty(); unsigned PartBase = 0; - Type *FinalType = I->getType(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + Type *FinalType = Arg.getType(); + if (Arg.hasAttribute(Attribute::ByVal)) FinalType = cast<PointerType>(FinalType)->getElementType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( FinalType, F.getCallingConv(), F.isVarArg()); @@ -8050,17 +8538,22 @@ void SelectionDAGISel::LowerArguments(const Function &F) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + unsigned OriginalAlignment = + TLI->getABIAlignmentForCallingConv(ArgTy, DL); + + if (Arg.hasAttribute(Attribute::ZExt)) Flags.setZExt(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + if (Arg.hasAttribute(Attribute::SExt)) Flags.setSExt(); - if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) { + if (Arg.hasAttribute(Attribute::InReg)) { // If we are using vectorcall calling convention, a structure that is // passed InReg - is surely an HVA if (F.getCallingConv() == CallingConv::X86_VectorCall && - isa<StructType>(I->getType())) { + isa<StructType>(Arg.getType())) { // The first value of a structure is marked if (0 == Value) Flags.setHvaStart(); @@ -8069,15 +8562,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Set InReg Flag Flags.setInReg(); } - if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) + if (Arg.hasAttribute(Attribute::StructRet)) Flags.setSRet(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf)) + if (Arg.hasAttribute(Attribute::SwiftSelf)) Flags.setSwiftSelf(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) + if (Arg.hasAttribute(Attribute::SwiftError)) Flags.setSwiftError(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::ByVal)) Flags.setByVal(); - if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + if (Arg.hasAttribute(Attribute::InAlloca)) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated @@ -8088,33 +8581,37 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getCallingConv() == CallingConv::X86_INTR) { // IA Interrupt passes frame (1st parameter) by value in the stack. - if (Idx == 1) + if (ArgNo == 0) Flags.setByVal(); } if (Flags.isByVal() || Flags.isInAlloca()) { - PointerType *Ty = cast<PointerType>(I->getType()); + PointerType *Ty = cast<PointerType>(Arg.getType()); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; - if (F.getParamAlignment(Idx)) - FrameAlign = F.getParamAlignment(Idx); + if (Arg.getParamAlignment()) + FrameAlign = Arg.getParamAlignment(); else FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } - if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) + if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); + if (ArgCopyElisionCandidates.count(&Arg)) + Flags.setCopyElisionCandidate(); - MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); + MVT RegisterVT = + TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); + unsigned NumRegs = + TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, - Idx-1, PartBase+i*RegisterVT.getStoreSize()); + ArgNo, PartBase+i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -8155,7 +8652,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Set up the argument values. unsigned i = 0; - Idx = 1; if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. @@ -8177,49 +8673,63 @@ void SelectionDAGISel::LowerArguments(const Function &F) { DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. - // Idx indexes LLVM arguments. Don't touch it. ++i; } - for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; - ++I, ++Idx) { + SmallVector<SDValue, 4> Chains; + DenseMap<int, int> ArgCopyElisionFrameIndexMap; + for (const Argument &Arg : F.args()) { SmallVector<SDValue, 4> ArgValues; SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); + ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + continue; + + bool ArgHasUses = !Arg.use_empty(); + + // Elide the copying store if the target loaded this argument from a + // suitable fixed stack object. + if (Ins[i].Flags.isCopyElisionCandidate()) { + tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap, + ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg, + InVals[i], ArgHasUses); + } // If this argument is unused then remember its value. It is used to generate // debugging information. bool isSwiftErrorArg = TLI->supportSwiftError() && - F.getAttributes().hasAttribute(Idx, Attribute::SwiftError); - if (I->use_empty() && NumValues && !isSwiftErrorArg) { - SDB->setUnusedArgValue(&*I, InVals[i]); + Arg.hasAttribute(Attribute::SwiftError); + if (!ArgHasUses && !isSwiftErrorArg) { + SDB->setUnusedArgValue(&Arg, InVals[i]); // Also remember any frame index for use in FastISel. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) - FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; - MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); + MVT PartVT = + TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); + unsigned NumParts = + TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); // Even an apparant 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the // function. - if (!I->use_empty() || isSwiftErrorArg) { + if (ArgHasUses || isSwiftErrorArg) { Optional<ISD::NodeType> AssertOp; - if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + if (Arg.hasAttribute(Attribute::SExt)) AssertOp = ISD::AssertSext; - else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + else if (Arg.hasAttribute(Attribute::ZExt)) AssertOp = ISD::AssertZext; - ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], - NumParts, PartVT, VT, - nullptr, AssertOp)); + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, + PartVT, VT, nullptr, AssertOp, + true)); } i += NumParts; @@ -8232,18 +8742,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Note down frame index. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) - FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); - SDB->setValue(&*I, Res); + SDB->setValue(&Arg, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) - FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } // Update the SwiftErrorVRegDefMap. @@ -8263,18 +8773,36 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // uses with vregs. unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - FuncInfo->ValueMap[&*I] = Reg; + FuncInfo->ValueMap[&Arg] = Reg; continue; } } - if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) { - FuncInfo->InitializeRegForValue(&*I); - SDB->CopyToExportRegsIfNeeded(&*I); + if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) { + FuncInfo->InitializeRegForValue(&Arg); + SDB->CopyToExportRegsIfNeeded(&Arg); } } + if (!Chains.empty()) { + Chains.push_back(NewRoot); + NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + } + + DAG.setRoot(NewRoot); + assert(i == InVals.size() && "Argument register count mismatch!"); + // If any argument copy elisions occurred and we have debug info, update the + // stale frame indices used in the dbg.declare variable info table. + MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo(); + if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) { + for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) { + auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot); + if (I != ArgCopyElisionFrameIndexMap.end()) + VI.Slot = I->second; + } + } + // Finally, if the target has anything special to do, allow it to do so. EmitFunctionEntryCode(); } @@ -8402,13 +8930,10 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { HasTailCall = true; } -bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, - const SmallVectorImpl<unsigned> &TotalCases, - unsigned First, unsigned Last, - unsigned Density) const { +uint64_t +SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters, + unsigned First, unsigned Last) const { assert(Last >= First); - assert(TotalCases[Last] >= TotalCases[First]); - const APInt &LowCase = Clusters[First].Low->getValue(); const APInt &HighCase = Clusters[Last].High->getValue(); assert(LowCase.getBitWidth() == HighCase.getBitWidth()); @@ -8417,26 +8942,17 @@ bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, // comparison to lower. We should discriminate against such consecutive ranges // in jump tables. - uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100); - uint64_t Range = Diff + 1; + return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1; +} +uint64_t SelectionDAGBuilder::getJumpTableNumCases( + const SmallVectorImpl<unsigned> &TotalCases, unsigned First, + unsigned Last) const { + assert(Last >= First); + assert(TotalCases[Last] >= TotalCases[First]); uint64_t NumCases = TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); - - assert(NumCases < UINT64_MAX / 100); - assert(Range >= NumCases); - - return NumCases * 100 >= Range * Density; -} - -static inline bool areJTsAllowed(const TargetLowering &TLI, - const SwitchInst *SI) { - const Function *Fn = SI->getParent()->getParent(); - if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") - return false; - - return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); + return NumCases; } bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, @@ -8475,10 +8991,11 @@ bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, JTProbs[Clusters[I].MBB] += Clusters[I].Prob; } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumDests = JTProbs.size(); - if (isSuitableForBitTests(NumDests, NumCmps, - Clusters[First].Low->getValue(), - Clusters[Last].High->getValue())) { + if (TLI.isSuitableForBitTests( + NumDests, NumCmps, Clusters[First].Low->getValue(), + Clusters[Last].High->getValue(), DAG.getDataLayout())) { // Clusters[First..Last] should be lowered as bit tests instead. return false; } @@ -8499,7 +9016,6 @@ bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, } JumpTableMBB->normalizeSuccProbs(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) ->createJumpTableIndex(Table); @@ -8528,17 +9044,12 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, #endif const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!areJTsAllowed(TLI, SI)) + if (!TLI.areJTsAllowed(SI->getParent()->getParent())) return; - const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize(); - const int64_t N = Clusters.size(); const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries(); const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; - const unsigned MaxJumpTableSize = - OptForSize || TLI.getMaximumJumpTableSize() == 0 - ? UINT_MAX : TLI.getMaximumJumpTableSize(); if (N < 2 || N < MinJumpTableEntries) return; @@ -8553,15 +9064,12 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, TotalCases[i] += TotalCases[i - 1]; } - const unsigned MinDensity = - OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; - // Cheap case: the whole range may be suitable for jump table. - unsigned JumpTableSize = (Clusters[N - 1].High->getValue() - - Clusters[0].Low->getValue()) - .getLimitedValue(UINT_MAX - 1) + 1; - if (JumpTableSize <= MaxJumpTableSize && - isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) { + uint64_t Range = getJumpTableRange(Clusters,0, N - 1); + uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1); + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { Clusters[0] = JTCluster; @@ -8614,11 +9122,11 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, // Search for a solution that results in fewer partitions. for (int64_t j = N - 1; j > i; j--) { // Try building a partition from Clusters[i..j]. - JumpTableSize = (Clusters[j].High->getValue() - - Clusters[i].Low->getValue()) - .getLimitedValue(UINT_MAX - 1) + 1; - if (JumpTableSize <= MaxJumpTableSize && - isDense(Clusters, TotalCases, i, j, MinDensity)) { + uint64_t Range = getJumpTableRange(Clusters, i, j); + uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j); + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1]; int64_t NumEntries = j - i + 1; @@ -8662,36 +9170,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, Clusters.resize(DstIndex); } -bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { - // FIXME: Using the pointer type doesn't seem ideal. - uint64_t BW = DAG.getDataLayout().getPointerSizeInBits(); - uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; - return Range <= BW; -} - -bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests, - unsigned NumCmps, - const APInt &Low, - const APInt &High) { - // FIXME: I don't think NumCmps is the correct metric: a single case and a - // range of cases both require only one branch to lower. Just looking at the - // number of clusters and destinations should be enough to decide whether to - // build bit tests. - - // To lower a range with bit tests, the range must fit the bitwidth of a - // machine word. - if (!rangeFitsInWord(Low, High)) - return false; - - // Decide whether it's profitable to lower this range with bit tests. Each - // destination requires a bit test and branch, and there is an overall range - // check branch. For a small number of clusters, separate comparisons might be - // cheaper, and for many destinations, splitting the range might be better. - return (NumDests == 1 && NumCmps >= 3) || - (NumDests == 2 && NumCmps >= 5) || - (NumDests == 3 && NumCmps >= 6); -} - bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, const SwitchInst *SI, @@ -8713,16 +9191,17 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, APInt High = Clusters[Last].High->getValue(); assert(Low.slt(High)); - if (!isSuitableForBitTests(NumDests, NumCmps, Low, High)) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL)) return false; APInt LowBound; APInt CmpRange; - const int BitWidth = DAG.getTargetLoweringInfo() - .getPointerTy(DAG.getDataLayout()) - .getSizeInBits(); - assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); + const int BitWidth = TLI.getPointerTy(DL).getSizeInBits(); + assert(TLI.rangeFitsInWord(Low, High, DL) && + "Case range must fit in bit mask!"); // Check if the clusters cover a contiguous range such that no value in the // range will jump to the default statement. @@ -8812,7 +9291,9 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, // If target does not have legal shift left, do not emit bit tests at all. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); + const DataLayout &DL = DAG.getDataLayout(); + + EVT PTy = TLI.getPointerTy(DL); if (!TLI.isOperationLegal(ISD::SHL, PTy)) return; @@ -8843,8 +9324,8 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, // Try building a partition from Clusters[i..j]. // Check the range. - if (!rangeFitsInWord(Clusters[i].Low->getValue(), - Clusters[j].High->getValue())) + if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(), + Clusters[j].High->getValue(), DL)) continue; // Check nbr of destinations and cluster types. |