diff options
Diffstat (limited to 'lib/CodeGen/TargetInfo.cpp')
-rw-r--r-- | lib/CodeGen/TargetInfo.cpp | 696 |
1 files changed, 595 insertions, 101 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index ffff0d0..7cc63b7 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -17,9 +17,9 @@ #include "CodeGenFunction.h" #include "clang/AST/RecordLayout.h" #include "clang/Frontend/CodeGenOptions.h" -#include "llvm/Type.h" -#include "llvm/DataLayout.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" using namespace clang; using namespace CodeGen; @@ -37,7 +37,7 @@ static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder, } static bool isAggregateTypeForABI(QualType T) { - return CodeGenFunction::hasAggregateLLVMType(T) || + return !CodeGenFunction::hasScalarEvaluationKind(T) || T->isMemberFunctionPointerType(); } @@ -95,6 +95,7 @@ unsigned TargetCodeGenInfo::getSizeOfUnwindException() const { // x86-32 FreeBSD, Linux, Darwin // PowerPC Linux, Darwin // ARM Darwin (*not* EABI) + // AArch64 Linux return 32; } @@ -173,7 +174,7 @@ static bool hasNonTrivialDestructorOrCopyConstructor(const RecordType *RT) { if (!RD) return false; - return !RD->hasTrivialDestructor() || !RD->hasTrivialCopyConstructor(); + return !RD->hasTrivialDestructor() || RD->hasNonTrivialCopyConstructor(); } /// isRecordWithNonTrivialDestructorOrCopyConstructor - Determine if a type is @@ -266,9 +267,15 @@ static const Type *isSingleElementStruct(QualType T, ASTContext &Context) { } static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { + // Treat complex types as the element type. + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) + Ty = CTy->getElementType(); + + // Check for a type which we know has a simple scalar argument-passing + // convention without any padding. (We're specifically looking for 32 + // and 64-bit integer and integer-equivalents, float, and double.) if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && - !Ty->isAnyComplexType() && !Ty->isEnumeralType() && - !Ty->isBlockPointerType()) + !Ty->isEnumeralType() && !Ty->isBlockPointerType()) return false; uint64_t Size = Context.getTypeSize(Ty); @@ -414,6 +421,8 @@ class PNaClTargetCodeGenInfo : public TargetCodeGenInfo { void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const { FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + // Obtain the initial number of registers available for passing integers + // from the function's regparm attribute. unsigned FreeRegs = FI.getHasRegParm() ? FI.getRegParm() : 0; for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); @@ -426,15 +435,18 @@ llvm::Value *PNaClABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, return 0; } +// \brief Classify argument of given type \p Ty. \p FreeRegs is the number of +// registers available for passing arguments - it can be updated by this +// method. ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty, unsigned &FreeRegs) const { if (isAggregateTypeForABI(Ty)) { - // Records with non trivial destructors/constructors should not be passed - // by value. + // In the PNaCl ABI we always pass records/structures on the stack. The + // byval attribute can be used if the record doesn't have non-trivial + // constructors/destructors. FreeRegs = 0; if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty)) return ABIArgInfo::getIndirect(0, /*ByVal=*/false); - return ABIArgInfo::getIndirect(0); } @@ -445,14 +457,17 @@ ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty, ABIArgInfo BaseInfo = (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); - // Regparm regs hold 32 bits. - unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; - if (SizeInRegs == 0) return BaseInfo; - if (SizeInRegs > FreeRegs) { + // Figure out how many of the free registers can be occupied by this type. + // regparm registers are 32-bit. + unsigned NumRegsRequired = (getContext().getTypeSize(Ty) + 31) / 32; + if (NumRegsRequired == 0) return BaseInfo; + if (NumRegsRequired > FreeRegs) { + // If this type needs more registers than we have available, no more + // passing in-registers can happen. FreeRegs = 0; return BaseInfo; } - FreeRegs -= SizeInRegs; + FreeRegs -= NumRegsRequired; return BaseInfo.isDirect() ? ABIArgInfo::getDirectInReg(BaseInfo.getCoerceToType()) : ABIArgInfo::getExtendInReg(BaseInfo.getCoerceToType()); @@ -462,6 +477,7 @@ ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); + // In the PNaCl ABI we always return records/structures on the stack. if (isAggregateTypeForABI(RetTy)) return ABIArgInfo::getIndirect(0); @@ -473,11 +489,9 @@ ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } -/// UseX86_MMXType - Return true if this is an MMX type that should use the -/// special x86_mmx type. -bool UseX86_MMXType(llvm::Type *IRType) { - // If the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>, use the - // special x86_mmx type. +/// IsX86_MMXType - Return true if this is an MMX type. +bool IsX86_MMXType(llvm::Type *IRType) { + // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() && IRType->getScalarSizeInBits() != 64; @@ -506,7 +520,6 @@ class X86_32ABIInfo : public ABIInfo { bool IsDarwinVectorABI; bool IsSmallStructInRegABI; - bool IsMMXDisabled; bool IsWin32FloatStructABI; unsigned DefaultNumRegisterParameters; @@ -539,18 +552,17 @@ public: virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty, CodeGenFunction &CGF) const; - X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool m, bool w, + X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool w, unsigned r) : ABIInfo(CGT), IsDarwinVectorABI(d), IsSmallStructInRegABI(p), - IsMMXDisabled(m), IsWin32FloatStructABI(w), - DefaultNumRegisterParameters(r) {} + IsWin32FloatStructABI(w), DefaultNumRegisterParameters(r) {} }; class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { public: X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, - bool d, bool p, bool m, bool w, unsigned r) - :TargetCodeGenInfo(new X86_32ABIInfo(CGT, d, p, m, w, r)) {} + bool d, bool p, bool w, unsigned r) + :TargetCodeGenInfo(new X86_32ABIInfo(CGT, d, p, w, r)) {} void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const; @@ -903,15 +915,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, Size)); } - llvm::Type *IRType = CGT.ConvertType(Ty); - if (UseX86_MMXType(IRType)) { - if (IsMMXDisabled) - return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), - 64)); - ABIArgInfo AAI = ABIArgInfo::getDirect(IRType); - AAI.setCoerceToType(llvm::Type::getX86_MMXTy(getVMContext())); - return AAI; - } + if (IsX86_MMXType(CGT.ConvertType(Ty))) + return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); return ABIArgInfo::getDirect(); } @@ -1013,8 +1018,10 @@ void X86_32TargetCodeGenInfo::SetTargetAttributes(const Decl *D, // Now add the 'alignstack' attribute with a value of 16. llvm::AttrBuilder B; B.addStackAlignmentAttr(16); - Fn->addAttribute(llvm::AttrListPtr::FunctionIndex, - llvm::Attributes::get(CGM.getLLVMContext(), B)); + Fn->addAttributes(llvm::AttributeSet::FunctionIndex, + llvm::AttributeSet::get(CGM.getLLVMContext(), + llvm::AttributeSet::FunctionIndex, + B)); } } } @@ -1381,7 +1388,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, } else if ((k == BuiltinType::Float || k == BuiltinType::Double) || (k == BuiltinType::LongDouble && getContext().getTargetInfo().getTriple().getOS() == - llvm::Triple::NativeClient)) { + llvm::Triple::NaCl)) { Current = SSE; } else if (k == BuiltinType::LongDouble) { Lo = X87; @@ -1470,7 +1477,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, else if (ET == getContext().DoubleTy || (ET == getContext().LongDoubleTy && getContext().getTargetInfo().getTriple().getOS() == - llvm::Triple::NativeClient)) + llvm::Triple::NaCl)) Lo = Hi = SSE; else if (ET == getContext().LongDoubleTy) Current = ComplexX87; @@ -2777,6 +2784,9 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const { ABIArgInfo PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const { + if (Ty->isAnyComplexType()) + return ABIArgInfo::getDirect(); + if (isAggregateTypeForABI(Ty)) { // Records with non trivial destructors/constructors should not be passed // by value. @@ -2795,6 +2805,9 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); + if (RetTy->isAnyComplexType()) + return ABIArgInfo::getDirect(); + if (isAggregateTypeForABI(RetTy)) return ABIArgInfo::getIndirect(0); @@ -2813,14 +2826,52 @@ llvm::Value *PPC64_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr, llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap"); llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur"); - // Update the va_list pointer. + // Update the va_list pointer. The pointer should be bumped by the + // size of the object. We can trust getTypeSize() except for a complex + // type whose base type is smaller than a doubleword. For these, the + // size of the object is 16 bytes; see below for further explanation. unsigned SizeInBytes = CGF.getContext().getTypeSize(Ty) / 8; + QualType BaseTy; + unsigned CplxBaseSize = 0; + + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { + BaseTy = CTy->getElementType(); + CplxBaseSize = CGF.getContext().getTypeSize(BaseTy) / 8; + if (CplxBaseSize < 8) + SizeInBytes = 16; + } + unsigned Offset = llvm::RoundUpToAlignment(SizeInBytes, 8); llvm::Value *NextAddr = Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int64Ty, Offset), "ap.next"); Builder.CreateStore(NextAddr, VAListAddrAsBPP); + // If we have a complex type and the base type is smaller than 8 bytes, + // the ABI calls for the real and imaginary parts to be right-adjusted + // in separate doublewords. However, Clang expects us to produce a + // pointer to a structure with the two parts packed tightly. So generate + // loads of the real and imaginary parts relative to the va_list pointer, + // and store them to a temporary structure. + if (CplxBaseSize && CplxBaseSize < 8) { + llvm::Value *RealAddr = Builder.CreatePtrToInt(Addr, CGF.Int64Ty); + llvm::Value *ImagAddr = RealAddr; + RealAddr = Builder.CreateAdd(RealAddr, Builder.getInt64(8 - CplxBaseSize)); + ImagAddr = Builder.CreateAdd(ImagAddr, Builder.getInt64(16 - CplxBaseSize)); + llvm::Type *PBaseTy = llvm::PointerType::getUnqual(CGF.ConvertType(BaseTy)); + RealAddr = Builder.CreateIntToPtr(RealAddr, PBaseTy); + ImagAddr = Builder.CreateIntToPtr(ImagAddr, PBaseTy); + llvm::Value *Real = Builder.CreateLoad(RealAddr, false, ".vareal"); + llvm::Value *Imag = Builder.CreateLoad(ImagAddr, false, ".vaimag"); + llvm::Value *Ptr = CGF.CreateTempAlloca(CGT.ConvertTypeForMem(Ty), + "vacplx"); + llvm::Value *RealPtr = Builder.CreateStructGEP(Ptr, 0, ".real"); + llvm::Value *ImagPtr = Builder.CreateStructGEP(Ptr, 1, ".imag"); + Builder.CreateStore(Real, RealPtr, false); + Builder.CreateStore(Imag, ImagPtr, false); + return Ptr; + } + // If the argument is smaller than 8 bytes, it is right-adjusted in // its doubleword slot. Adjust the pointer to pick it up from the // correct offset. @@ -2908,7 +2959,9 @@ private: ABIKind Kind; public: - ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {} + ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) { + setRuntimeCC(); + } bool isEABI() const { StringRef Env = @@ -2930,6 +2983,10 @@ private: virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty, CodeGenFunction &CGF) const; + + llvm::CallingConv::ID getLLVMDefaultCC() const; + llvm::CallingConv::ID getABIDefaultCC() const; + void setRuntimeCC(); }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { @@ -2999,32 +3056,41 @@ void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { if (FI.getCallingConvention() != llvm::CallingConv::C) return; - // Calling convention as default by an ABI. - llvm::CallingConv::ID DefaultCC; + llvm::CallingConv::ID cc = getRuntimeCC(); + if (cc != llvm::CallingConv::C) + FI.setEffectiveCallingConvention(cc); +} + +/// Return the default calling convention that LLVM will use. +llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const { + // The default calling convention that LLVM will infer. if (getContext().getTargetInfo().getTriple().getEnvironmentName()=="gnueabihf") - DefaultCC = llvm::CallingConv::ARM_AAPCS_VFP; + return llvm::CallingConv::ARM_AAPCS_VFP; else if (isEABI()) - DefaultCC = llvm::CallingConv::ARM_AAPCS; + return llvm::CallingConv::ARM_AAPCS; else - DefaultCC = llvm::CallingConv::ARM_APCS; + return llvm::CallingConv::ARM_APCS; +} - // If user did not ask for specific calling convention explicitly (e.g. via - // pcs attribute), set effective calling convention if it's different than ABI - // default. +/// Return the calling convention that our ABI would like us to use +/// as the C calling convention. +llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const { switch (getABIKind()) { - case APCS: - if (DefaultCC != llvm::CallingConv::ARM_APCS) - FI.setEffectiveCallingConvention(llvm::CallingConv::ARM_APCS); - break; - case AAPCS: - if (DefaultCC != llvm::CallingConv::ARM_AAPCS) - FI.setEffectiveCallingConvention(llvm::CallingConv::ARM_AAPCS); - break; - case AAPCS_VFP: - if (DefaultCC != llvm::CallingConv::ARM_AAPCS_VFP) - FI.setEffectiveCallingConvention(llvm::CallingConv::ARM_AAPCS_VFP); - break; + case APCS: return llvm::CallingConv::ARM_APCS; + case AAPCS: return llvm::CallingConv::ARM_AAPCS; + case AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; } + llvm_unreachable("bad ABI kind"); +} + +void ARMABIInfo::setRuntimeCC() { + assert(getRuntimeCC() == llvm::CallingConv::C); + + // Don't muddy up the IR with a ton of explicit annotations if + // they'd just match what LLVM will infer from the triple. + llvm::CallingConv::ID abiCC = getABIDefaultCC(); + if (abiCC != getLLVMDefaultCC()) + RuntimeCC = abiCC; } /// isHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous @@ -3539,6 +3605,420 @@ llvm::Value *NaClARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, } //===----------------------------------------------------------------------===// +// AArch64 ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class AArch64ABIInfo : public ABIInfo { +public: + AArch64ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} + +private: + // The AArch64 PCS is explicit about return types and argument types being + // handled identically, so we don't need to draw a distinction between + // Argument and Return classification. + ABIArgInfo classifyGenericType(QualType Ty, int &FreeIntRegs, + int &FreeVFPRegs) const; + + ABIArgInfo tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, bool IsInt, + llvm::Type *DirectTy = 0) const; + + virtual void computeInfo(CGFunctionInfo &FI) const; + + virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty, + CodeGenFunction &CGF) const; +}; + +class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { +public: + AArch64TargetCodeGenInfo(CodeGenTypes &CGT) + :TargetCodeGenInfo(new AArch64ABIInfo(CGT)) {} + + const AArch64ABIInfo &getABIInfo() const { + return static_cast<const AArch64ABIInfo&>(TargetCodeGenInfo::getABIInfo()); + } + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const { + return 31; + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const { + // 0-31 are x0-x30 and sp: 8 bytes each + llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); + AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 31); + + // 64-95 are v0-v31: 16 bytes each + llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); + AssignToArrayRange(CGF.Builder, Address, Sixteen8, 64, 95); + + return false; + } + +}; + +} + +void AArch64ABIInfo::computeInfo(CGFunctionInfo &FI) const { + int FreeIntRegs = 8, FreeVFPRegs = 8; + + FI.getReturnInfo() = classifyGenericType(FI.getReturnType(), + FreeIntRegs, FreeVFPRegs); + + FreeIntRegs = FreeVFPRegs = 8; + for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); + it != ie; ++it) { + it->info = classifyGenericType(it->type, FreeIntRegs, FreeVFPRegs); + + } +} + +ABIArgInfo +AArch64ABIInfo::tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, + bool IsInt, llvm::Type *DirectTy) const { + if (FreeRegs >= RegsNeeded) { + FreeRegs -= RegsNeeded; + return ABIArgInfo::getDirect(DirectTy); + } + + llvm::Type *Padding = 0; + + // We need padding so that later arguments don't get filled in anyway. That + // wouldn't happen if only ByVal arguments followed in the same category, but + // a large structure will simply seem to be a pointer as far as LLVM is + // concerned. + if (FreeRegs > 0) { + if (IsInt) + Padding = llvm::Type::getInt64Ty(getVMContext()); + else + Padding = llvm::Type::getFloatTy(getVMContext()); + + // Either [N x i64] or [N x float]. + Padding = llvm::ArrayType::get(Padding, FreeRegs); + FreeRegs = 0; + } + + return ABIArgInfo::getIndirect(getContext().getTypeAlign(Ty) / 8, + /*IsByVal=*/ true, /*Realign=*/ false, + Padding); +} + + +ABIArgInfo AArch64ABIInfo::classifyGenericType(QualType Ty, + int &FreeIntRegs, + int &FreeVFPRegs) const { + // Can only occurs for return, but harmless otherwise. + if (Ty->isVoidType()) + return ABIArgInfo::getIgnore(); + + // Large vector types should be returned via memory. There's no such concept + // in the ABI, but they'd be over 16 bytes anyway so no matter how they're + // classified they'd go into memory (see B.3). + if (Ty->isVectorType() && getContext().getTypeSize(Ty) > 128) { + if (FreeIntRegs > 0) + --FreeIntRegs; + return ABIArgInfo::getIndirect(0, /*ByVal=*/false); + } + + // All non-aggregate LLVM types have a concrete ABI representation so they can + // be passed directly. After this block we're guaranteed to be in a + // complicated case. + if (!isAggregateTypeForABI(Ty)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + if (Ty->isFloatingType() || Ty->isVectorType()) + return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ false); + + assert(getContext().getTypeSize(Ty) <= 128 && + "unexpectedly large scalar type"); + + int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1; + + // If the type may need padding registers to ensure "alignment", we must be + // careful when this is accounted for. Increasing the effective size covers + // all cases. + if (getContext().getTypeAlign(Ty) == 128) + RegsNeeded += FreeIntRegs % 2 != 0; + + return tryUseRegs(Ty, FreeIntRegs, RegsNeeded, /*IsInt=*/ true); + } + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always indirect. + if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty)) { + if (FreeIntRegs > 0) + --FreeIntRegs; + return ABIArgInfo::getIndirect(0, /*ByVal=*/false); + } + + if (isEmptyRecord(getContext(), Ty, true)) { + if (!getContext().getLangOpts().CPlusPlus) { + // Empty structs outside C++ mode are a GNU extension, so no ABI can + // possibly tell us what to do. It turns out (I believe) that GCC ignores + // the object for parameter-passsing purposes. + return ABIArgInfo::getIgnore(); + } + + // The combination of C++98 9p5 (sizeof(struct) != 0) and the pseudocode + // description of va_arg in the PCS require that an empty struct does + // actually occupy space for parameter-passing. I'm hoping for a + // clarification giving an explicit paragraph to point to in future. + return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ true, + llvm::Type::getInt8Ty(getVMContext())); + } + + // Homogeneous vector aggregates get passed in registers or on the stack. + const Type *Base = 0; + uint64_t NumMembers = 0; + if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)) { + assert(Base && "Base class should be set for homogeneous aggregate"); + // Homogeneous aggregates are passed and returned directly. + return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ NumMembers, + /*IsInt=*/ false); + } + + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 128) { + // Small structs can use the same direct type whether they're in registers + // or on the stack. + llvm::Type *BaseTy; + unsigned NumBases; + int SizeInRegs = (Size + 63) / 64; + + if (getContext().getTypeAlign(Ty) == 128) { + BaseTy = llvm::Type::getIntNTy(getVMContext(), 128); + NumBases = 1; + + // If the type may need padding registers to ensure "alignment", we must + // be careful when this is accounted for. Increasing the effective size + // covers all cases. + SizeInRegs += FreeIntRegs % 2 != 0; + } else { + BaseTy = llvm::Type::getInt64Ty(getVMContext()); + NumBases = SizeInRegs; + } + llvm::Type *DirectTy = llvm::ArrayType::get(BaseTy, NumBases); + + return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ SizeInRegs, + /*IsInt=*/ true, DirectTy); + } + + // If the aggregate is > 16 bytes, it's passed and returned indirectly. In + // LLVM terms the return uses an "sret" pointer, but that's handled elsewhere. + --FreeIntRegs; + return ABIArgInfo::getIndirect(0, /* byVal = */ false); +} + +llvm::Value *AArch64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, + CodeGenFunction &CGF) const { + // The AArch64 va_list type and handling is specified in the Procedure Call + // Standard, section B.4: + // + // struct { + // void *__stack; + // void *__gr_top; + // void *__vr_top; + // int __gr_offs; + // int __vr_offs; + // }; + + assert(!CGF.CGM.getDataLayout().isBigEndian() + && "va_arg not implemented for big-endian AArch64"); + + int FreeIntRegs = 8, FreeVFPRegs = 8; + Ty = CGF.getContext().getCanonicalType(Ty); + ABIArgInfo AI = classifyGenericType(Ty, FreeIntRegs, FreeVFPRegs); + + llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg"); + llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); + llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); + llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); + + llvm::Value *reg_offs_p = 0, *reg_offs = 0; + int reg_top_index; + int RegSize; + if (FreeIntRegs < 8) { + assert(FreeVFPRegs == 8 && "Arguments never split between int & VFP regs"); + // 3 is the field number of __gr_offs + reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p"); + reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); + reg_top_index = 1; // field number for __gr_top + RegSize = 8 * (8 - FreeIntRegs); + } else { + assert(FreeVFPRegs < 8 && "Argument must go in VFP or int regs"); + // 4 is the field number of __vr_offs. + reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p"); + reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs"); + reg_top_index = 2; // field number for __vr_top + RegSize = 16 * (8 - FreeVFPRegs); + } + + //======================================= + // Find out where argument was passed + //======================================= + + // If reg_offs >= 0 we're already using the stack for this type of + // argument. We don't want to keep updating reg_offs (in case it overflows, + // though anyone passing 2GB of arguments, each at most 16 bytes, deserves + // whatever they get). + llvm::Value *UsingStack = 0; + UsingStack = CGF.Builder.CreateICmpSGE(reg_offs, + llvm::ConstantInt::get(CGF.Int32Ty, 0)); + + CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock); + + // Otherwise, at least some kind of argument could go in these registers, the + // quesiton is whether this particular type is too big. + CGF.EmitBlock(MaybeRegBlock); + + // Integer arguments may need to correct register alignment (for example a + // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we + // align __gr_offs to calculate the potential address. + if (FreeIntRegs < 8 && AI.isDirect() && getContext().getTypeAlign(Ty) > 64) { + int Align = getContext().getTypeAlign(Ty) / 8; + + reg_offs = CGF.Builder.CreateAdd(reg_offs, + llvm::ConstantInt::get(CGF.Int32Ty, Align - 1), + "align_regoffs"); + reg_offs = CGF.Builder.CreateAnd(reg_offs, + llvm::ConstantInt::get(CGF.Int32Ty, -Align), + "aligned_regoffs"); + } + + // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list. + llvm::Value *NewOffset = 0; + NewOffset = CGF.Builder.CreateAdd(reg_offs, + llvm::ConstantInt::get(CGF.Int32Ty, RegSize), + "new_reg_offs"); + CGF.Builder.CreateStore(NewOffset, reg_offs_p); + + // Now we're in a position to decide whether this argument really was in + // registers or not. + llvm::Value *InRegs = 0; + InRegs = CGF.Builder.CreateICmpSLE(NewOffset, + llvm::ConstantInt::get(CGF.Int32Ty, 0), + "inreg"); + + CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock); + + //======================================= + // Argument was in registers + //======================================= + + // Now we emit the code for if the argument was originally passed in + // registers. First start the appropriate block: + CGF.EmitBlock(InRegBlock); + + llvm::Value *reg_top_p = 0, *reg_top = 0; + reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p"); + reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top"); + llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs); + llvm::Value *RegAddr = 0; + llvm::Type *MemTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty)); + + if (!AI.isDirect()) { + // If it's been passed indirectly (actually a struct), whatever we find from + // stored registers or on the stack will actually be a struct **. + MemTy = llvm::PointerType::getUnqual(MemTy); + } + + const Type *Base = 0; + uint64_t NumMembers; + if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers) + && NumMembers > 1) { + // Homogeneous aggregates passed in registers will have their elements split + // and stored 16-bytes apart regardless of size (they're notionally in qN, + // qN+1, ...). We reload and store into a temporary local variable + // contiguously. + assert(AI.isDirect() && "Homogeneous aggregates should be passed directly"); + llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0)); + llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers); + llvm::Value *Tmp = CGF.CreateTempAlloca(HFATy); + + for (unsigned i = 0; i < NumMembers; ++i) { + llvm::Value *BaseOffset = llvm::ConstantInt::get(CGF.Int32Ty, 16 * i); + llvm::Value *LoadAddr = CGF.Builder.CreateGEP(BaseAddr, BaseOffset); + LoadAddr = CGF.Builder.CreateBitCast(LoadAddr, + llvm::PointerType::getUnqual(BaseTy)); + llvm::Value *StoreAddr = CGF.Builder.CreateStructGEP(Tmp, i); + + llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr); + CGF.Builder.CreateStore(Elem, StoreAddr); + } + + RegAddr = CGF.Builder.CreateBitCast(Tmp, MemTy); + } else { + // Otherwise the object is contiguous in memory + RegAddr = CGF.Builder.CreateBitCast(BaseAddr, MemTy); + } + + CGF.EmitBranch(ContBlock); + + //======================================= + // Argument was on the stack + //======================================= + CGF.EmitBlock(OnStackBlock); + + llvm::Value *stack_p = 0, *OnStackAddr = 0; + stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p"); + OnStackAddr = CGF.Builder.CreateLoad(stack_p, "stack"); + + // Again, stack arguments may need realigmnent. In this case both integer and + // floating-point ones might be affected. + if (AI.isDirect() && getContext().getTypeAlign(Ty) > 64) { + int Align = getContext().getTypeAlign(Ty) / 8; + + OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty); + + OnStackAddr = CGF.Builder.CreateAdd(OnStackAddr, + llvm::ConstantInt::get(CGF.Int64Ty, Align - 1), + "align_stack"); + OnStackAddr = CGF.Builder.CreateAnd(OnStackAddr, + llvm::ConstantInt::get(CGF.Int64Ty, -Align), + "align_stack"); + + OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy); + } + + uint64_t StackSize; + if (AI.isDirect()) + StackSize = getContext().getTypeSize(Ty) / 8; + else + StackSize = 8; + + // All stack slots are 8 bytes + StackSize = llvm::RoundUpToAlignment(StackSize, 8); + + llvm::Value *StackSizeC = llvm::ConstantInt::get(CGF.Int32Ty, StackSize); + llvm::Value *NewStack = CGF.Builder.CreateGEP(OnStackAddr, StackSizeC, + "new_stack"); + + // Write the new value of __stack for the next call to va_arg + CGF.Builder.CreateStore(NewStack, stack_p); + + OnStackAddr = CGF.Builder.CreateBitCast(OnStackAddr, MemTy); + + CGF.EmitBranch(ContBlock); + + //======================================= + // Tidy up + //======================================= + CGF.EmitBlock(ContBlock); + + llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(MemTy, 2, "vaarg.addr"); + ResAddr->addIncoming(RegAddr, InRegBlock); + ResAddr->addIncoming(OnStackAddr, OnStackBlock); + + if (AI.isDirect()) + return ResAddr; + + return CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"); +} + +//===----------------------------------------------------------------------===// // NVPTX ABI Implementation //===----------------------------------------------------------------------===// @@ -3563,6 +4043,8 @@ public: virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const; +private: + static void addKernelMetadata(llvm::Function *F); }; ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { @@ -3590,25 +4072,7 @@ void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { if (FI.getCallingConvention() != llvm::CallingConv::C) return; - // Calling convention as default by an ABI. - // We're still using the PTX_Kernel/PTX_Device calling conventions here, - // but we should switch to NVVM metadata later on. - llvm::CallingConv::ID DefaultCC; - const LangOptions &LangOpts = getContext().getLangOpts(); - if (LangOpts.OpenCL || LangOpts.CUDA) { - // If we are in OpenCL or CUDA mode, then default to device functions - DefaultCC = llvm::CallingConv::PTX_Device; - } else { - // If we are in standard C/C++ mode, use the triple to decide on the default - StringRef Env = - getContext().getTargetInfo().getTriple().getEnvironmentName(); - if (Env == "device") - DefaultCC = llvm::CallingConv::PTX_Device; - else - DefaultCC = llvm::CallingConv::PTX_Kernel; - } - FI.setEffectiveCallingConvention(DefaultCC); - + FI.setEffectiveCallingConvention(getRuntimeCC()); } llvm::Value *NVPTXABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, @@ -3626,26 +4090,43 @@ SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, // Perform special handling in OpenCL mode if (M.getLangOpts().OpenCL) { - // Use OpenCL function attributes to set proper calling conventions + // Use OpenCL function attributes to check for kernel functions // By default, all functions are device functions if (FD->hasAttr<OpenCLKernelAttr>()) { - // OpenCL __kernel functions get a kernel calling convention - F->setCallingConv(llvm::CallingConv::PTX_Kernel); + // OpenCL __kernel functions get kernel metadata + addKernelMetadata(F); // And kernel functions are not subject to inlining - F->addFnAttr(llvm::Attributes::NoInline); + F->addFnAttr(llvm::Attribute::NoInline); } } // Perform special handling in CUDA mode. if (M.getLangOpts().CUDA) { - // CUDA __global__ functions get a kernel calling convention. Since + // CUDA __global__ functions get a kernel metadata entry. Since // __global__ functions cannot be called from the device, we do not // need to set the noinline attribute. if (FD->getAttr<CUDAGlobalAttr>()) - F->setCallingConv(llvm::CallingConv::PTX_Kernel); + addKernelMetadata(F); } } +void NVPTXTargetCodeGenInfo::addKernelMetadata(llvm::Function *F) { + llvm::Module *M = F->getParent(); + llvm::LLVMContext &Ctx = M->getContext(); + + // Get "nvvm.annotations" metadata node + llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); + + // Create !{<func-ref>, metadata !"kernel", i32 1} node + llvm::SmallVector<llvm::Value *, 3> MDVals; + MDVals.push_back(F); + MDVals.push_back(llvm::MDString::get(Ctx, "kernel")); + MDVals.push_back(llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1)); + + // Append metadata to nvvm.annotations + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); +} + } //===----------------------------------------------------------------------===// @@ -3748,7 +4229,7 @@ void MBlazeTargetCodeGenInfo::SetTargetAttributes(const Decl *D, F->setCallingConv(CC); // Step 2: Add attributes goodness. - F->addFnAttr(llvm::Attributes::NoInline); + F->addFnAttr(llvm::Attribute::NoInline); } // Step 3: Emit _interrupt_handler alias. @@ -3786,12 +4267,12 @@ void MSP430TargetCodeGenInfo::SetTargetAttributes(const Decl *D, F->setCallingConv(llvm::CallingConv::MSP430_INTR); // Step 2: Add attributes goodness. - F->addFnAttr(llvm::Attributes::NoInline); + F->addFnAttr(llvm::Attribute::NoInline); // Step 3: Emit ISR vector alias. - unsigned Num = attr->getNumber() + 0xffe0; + unsigned Num = attr->getNumber() / 2; new llvm::GlobalAlias(GV->getType(), llvm::Function::ExternalLinkage, - "vector_" + Twine::utohexstr(Num), + "__isr_" + Twine(Num), GV, &M.getModule()); } } @@ -3834,6 +4315,19 @@ public: return 29; } + void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const { + const FunctionDecl *FD = dyn_cast<FunctionDecl>(D); + if (!FD) return; + llvm::Function *Fn = cast<llvm::Function>(GV); + if (FD->hasAttr<Mips16Attr>()) { + Fn->addFnAttr("mips16"); + } + else if (FD->hasAttr<NoMips16Attr>()) { + Fn->addFnAttr("nomips16"); + } + } + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const; @@ -3963,7 +4457,8 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { if (Ty->isPromotableIntegerType()) return ABIArgInfo::getExtend(); - return ABIArgInfo::getDirect(0, 0, getPaddingType(Align, OrigOffset)); + return ABIArgInfo::getDirect(0, 0, + IsO32 ? 0 : getPaddingType(Align, OrigOffset)); } llvm::Type* @@ -4143,7 +4638,7 @@ void TCETargetCodeGenInfo::SetTargetAttributes(const Decl *D, if (M.getLangOpts().OpenCL) { if (FD->hasAttr<OpenCLKernelAttr>()) { // OpenCL C Kernel functions are not subject to inlining - F->addFnAttr(llvm::Attributes::NoInline); + F->addFnAttr(llvm::Attribute::NoInline); if (FD->hasAttr<ReqdWorkGroupSizeAttr>()) { @@ -4337,6 +4832,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::mips64el: return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false)); + case llvm::Triple::aarch64: + return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types)); + case llvm::Triple::arm: case llvm::Triple::thumb: { @@ -4348,7 +4846,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { Kind = ARMABIInfo::AAPCS_VFP; switch (Triple.getOS()) { - case llvm::Triple::NativeClient: + case llvm::Triple::NaCl: return *(TheTargetCodeGenInfo = new NaClARMTargetCodeGenInfo(Types, Kind)); default: @@ -4379,11 +4877,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return *(TheTargetCodeGenInfo = new TCETargetCodeGenInfo(Types)); case llvm::Triple::x86: { - bool DisableMMX = strcmp(getContext().getTargetInfo().getABI(), "no-mmx") == 0; - if (Triple.isOSDarwin()) return *(TheTargetCodeGenInfo = - new X86_32TargetCodeGenInfo(Types, true, true, DisableMMX, false, + new X86_32TargetCodeGenInfo(Types, true, true, false, CodeGenOpts.NumRegisterParameters)); switch (Triple.getOS()) { @@ -4395,19 +4891,17 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::OpenBSD: case llvm::Triple::Bitrig: return *(TheTargetCodeGenInfo = - new X86_32TargetCodeGenInfo(Types, false, true, DisableMMX, - false, + new X86_32TargetCodeGenInfo(Types, false, true, false, CodeGenOpts.NumRegisterParameters)); case llvm::Triple::Win32: return *(TheTargetCodeGenInfo = - new X86_32TargetCodeGenInfo(Types, false, true, DisableMMX, true, + new X86_32TargetCodeGenInfo(Types, false, true, true, CodeGenOpts.NumRegisterParameters)); default: return *(TheTargetCodeGenInfo = - new X86_32TargetCodeGenInfo(Types, false, false, DisableMMX, - false, + new X86_32TargetCodeGenInfo(Types, false, false, false, CodeGenOpts.NumRegisterParameters)); } } @@ -4420,7 +4914,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::MinGW32: case llvm::Triple::Cygwin: return *(TheTargetCodeGenInfo = new WinX86_64TargetCodeGenInfo(Types)); - case llvm::Triple::NativeClient: + case llvm::Triple::NaCl: return *(TheTargetCodeGenInfo = new NaClX86_64TargetCodeGenInfo(Types, HasAVX)); default: return *(TheTargetCodeGenInfo = new X86_64TargetCodeGenInfo(Types, |