diff options
Diffstat (limited to 'lib/CodeGen/CGBuiltin.cpp')
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 719 |
1 files changed, 521 insertions, 198 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 635e342..272baac 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "CodeGenFunction.h" +#include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CodeGenModule.h" #include "TargetInfo.h" @@ -21,9 +22,11 @@ #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" +#include <sstream> using namespace clang; using namespace CodeGen; @@ -156,6 +159,27 @@ static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { return Call; } +/// Emit the computation of the sign bit for a floating point value. Returns +/// the i1 sign bit value. +static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { + LLVMContext &C = CGF.CGM.getLLVMContext(); + + llvm::Type *Ty = V->getType(); + int Width = Ty->getPrimitiveSizeInBits(); + llvm::Type *IntTy = llvm::IntegerType::get(C, Width); + V = CGF.Builder.CreateBitCast(V, IntTy); + if (Ty->isPPC_FP128Ty()) { + // The higher-order double comes first, and so we need to truncate the + // pair to extract the overall sign. The order of the pair is the same + // in both little- and big-Endian modes. + Width >>= 1; + IntTy = llvm::IntegerType::get(C, Width); + V = CGF.Builder.CreateTrunc(V, IntTy); + } + Value *Zero = llvm::Constant::getNullValue(IntTy); + return CGF.Builder.CreateICmpSLT(V, Zero); +} + static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, const CallExpr *E, llvm::Value *calleeValue) { return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E, @@ -181,7 +205,7 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, "arguments have the same integer width?)"); llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); - llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y); + llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); Carry = CGF.Builder.CreateExtractValue(Tmp, 1); return CGF.Builder.CreateExtractValue(Tmp, 0); } @@ -230,8 +254,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, DstPtr = Builder.CreateBitCast(DstPtr, Type); SrcPtr = Builder.CreateBitCast(SrcPtr, Type); - return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy), - DstPtr, SrcPtr)); + return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), + {DstPtr, SrcPtr})); } case Builtin::BI__builtin_abs: case Builtin::BI__builtin_labs: @@ -309,7 +333,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); - Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); + Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, "cast"); @@ -326,7 +350,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); - Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); + Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, "cast"); @@ -342,9 +366,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); - Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue, - Builder.getTrue()), - llvm::ConstantInt::get(ArgType, 1)); + Value *Tmp = + Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), + llvm::ConstantInt::get(ArgType, 1)); Value *Zero = llvm::Constant::getNullValue(ArgType); Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); @@ -389,11 +413,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); + // Don't generate llvm.expect on -O0 as the backend won't use it for + // anything. + // Note, we still IRGen ExpectedValue because it could have side-effects. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return RValue::get(ArgValue); - Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue, - "expval"); + Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); + Value *Result = + Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); return RValue::get(Result); } case Builtin::BI__builtin_assume_aligned: { @@ -444,7 +473,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // FIXME: Get right address space. llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) }; Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); - return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI)); + return RValue::get( + Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0)), CI})); } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); @@ -455,25 +485,25 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::ConstantInt::get(Int32Ty, 3); Value *Data = llvm::ConstantInt::get(Int32Ty, 1); Value *F = CGM.getIntrinsic(Intrinsic::prefetch); - return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data)); + return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); } case Builtin::BI__builtin_readcyclecounter: { Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); - return RValue::get(Builder.CreateCall(F)); + return RValue::get(Builder.CreateCall(F, {})); } case Builtin::BI__builtin___clear_cache: { Value *Begin = EmitScalarExpr(E->getArg(0)); Value *End = EmitScalarExpr(E->getArg(1)); Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); - return RValue::get(Builder.CreateCall2(F, Begin, End)); + return RValue::get(Builder.CreateCall(F, {Begin, End})); } case Builtin::BI__builtin_trap: { Value *F = CGM.getIntrinsic(Intrinsic::trap); - return RValue::get(Builder.CreateCall(F)); + return RValue::get(Builder.CreateCall(F, {})); } case Builtin::BI__debugbreak: { Value *F = CGM.getIntrinsic(Intrinsic::debugtrap); - return RValue::get(Builder.CreateCall(F)); + return RValue::get(Builder.CreateCall(F, {})); } case Builtin::BI__builtin_unreachable: { if (SanOpts.has(SanitizerKind::Unreachable)) { @@ -498,7 +528,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *Exponent = EmitScalarExpr(E->getArg(1)); llvm::Type *ArgType = Base->getType(); Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); - return RValue::get(Builder.CreateCall2(F, Base, Exponent)); + return RValue::get(Builder.CreateCall(F, {Base, Exponent})); } case Builtin::BI__builtin_isgreater: @@ -551,8 +581,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } - // TODO: BI__builtin_isinf_sign - // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0 + case Builtin::BI__builtin_isinf_sign: { + // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 + Value *Arg = EmitScalarExpr(E->getArg(0)); + Value *AbsArg = EmitFAbs(*this, Arg); + Value *IsInf = Builder.CreateFCmpOEQ( + AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); + Value *IsNeg = EmitSignBit(*this, Arg); + + llvm::Type *IntTy = ConvertType(E->getType()); + Value *Zero = Constant::getNullValue(IntTy); + Value *One = ConstantInt::get(IntTy, 1); + Value *NegativeOne = ConstantInt::get(IntTy, -1); + Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); + Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); + return RValue::get(Result); + } case Builtin::BI__builtin_isnormal: { // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min @@ -815,7 +859,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32 : Intrinsic::eh_return_i64); - Builder.CreateCall2(F, Int, Ptr); + Builder.CreateCall(F, {Int, Ptr}); Builder.CreateUnreachable(); // We do need to preserve an insertion point. @@ -825,7 +869,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, } case Builtin::BI__builtin_unwind_init: { Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); - return RValue::get(Builder.CreateCall(F)); + return RValue::get(Builder.CreateCall(F, {})); } case Builtin::BI__builtin_extend_pointer: { // Extends a pointer to the size of an _Unwind_Word, which is @@ -864,7 +908,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Store the stack pointer to the setjmp buffer. Value *StackAddr = - Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); + Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave), {}); Value *StackSaveSlot = Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2)); Builder.CreateStore(StackAddr, StackSaveSlot); @@ -1357,6 +1401,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateCall(F, Arg0)); } + case Builtin::BI__builtin_pow: + case Builtin::BI__builtin_powf: + case Builtin::BI__builtin_powl: case Builtin::BIpow: case Builtin::BIpowf: case Builtin::BIpowl: { @@ -1367,7 +1414,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *Exponent = EmitScalarExpr(E->getArg(1)); llvm::Type *ArgType = Base->getType(); Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); - return RValue::get(Builder.CreateCall2(F, Base, Exponent)); + return RValue::get(Builder.CreateCall(F, {Base, Exponent})); } case Builtin::BIfma: @@ -1380,32 +1427,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *FirstArg = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = FirstArg->getType(); Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); - return RValue::get(Builder.CreateCall3(F, FirstArg, - EmitScalarExpr(E->getArg(1)), - EmitScalarExpr(E->getArg(2)))); + return RValue::get( + Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)), + EmitScalarExpr(E->getArg(2))})); } case Builtin::BI__builtin_signbit: case Builtin::BI__builtin_signbitf: case Builtin::BI__builtin_signbitl: { - LLVMContext &C = CGM.getLLVMContext(); - - Value *Arg = EmitScalarExpr(E->getArg(0)); - llvm::Type *ArgTy = Arg->getType(); - int ArgWidth = ArgTy->getPrimitiveSizeInBits(); - llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth); - Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy); - if (ArgTy->isPPC_FP128Ty()) { - // The higher-order double comes first, and so we need to truncate the - // pair to extract the overall sign. The order of the pair is the same - // in both little- and big-Endian modes. - ArgWidth >>= 1; - ArgIntTy = llvm::IntegerType::get(C, ArgWidth); - BCArg = Builder.CreateTrunc(BCArg, ArgIntTy); - } - Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy); - Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp); - return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType()))); + return RValue::get( + Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), + ConvertType(E->getType()))); } case Builtin::BI__builtin_annotation: { llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); @@ -1650,6 +1682,76 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true); return RValue::get(Load); } + + case Builtin::BI__exception_code: + case Builtin::BI_exception_code: + return RValue::get(EmitSEHExceptionCode()); + case Builtin::BI__exception_info: + case Builtin::BI_exception_info: + return RValue::get(EmitSEHExceptionInfo()); + case Builtin::BI__abnormal_termination: + case Builtin::BI_abnormal_termination: + return RValue::get(EmitSEHAbnormalTermination()); + case Builtin::BI_setjmpex: { + if (getTarget().getTriple().isOSMSVCRT()) { + llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; + llvm::AttributeSet ReturnsTwiceAttr = + AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, + llvm::Attribute::ReturnsTwice); + llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), + "_setjmpex", ReturnsTwiceAttr); + llvm::Value *Buf = Builder.CreateBitOrPointerCast( + EmitScalarExpr(E->getArg(0)), Int8PtrTy); + llvm::Value *FrameAddr = + Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), + ConstantInt::get(Int32Ty, 0)); + llvm::Value *Args[] = {Buf, FrameAddr}; + llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); + CS.setAttributes(ReturnsTwiceAttr); + return RValue::get(CS.getInstruction()); + } + break; + } + case Builtin::BI_setjmp: { + if (getTarget().getTriple().isOSMSVCRT()) { + llvm::AttributeSet ReturnsTwiceAttr = + AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, + llvm::Attribute::ReturnsTwice); + llvm::Value *Buf = Builder.CreateBitOrPointerCast( + EmitScalarExpr(E->getArg(0)), Int8PtrTy); + llvm::CallSite CS; + if (getTarget().getTriple().getArch() == llvm::Triple::x86) { + llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; + llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), + "_setjmp3", ReturnsTwiceAttr); + llvm::Value *Count = ConstantInt::get(IntTy, 0); + llvm::Value *Args[] = {Buf, Count}; + CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); + } else { + llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; + llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), + "_setjmp", ReturnsTwiceAttr); + llvm::Value *FrameAddr = + Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), + ConstantInt::get(Int32Ty, 0)); + llvm::Value *Args[] = {Buf, FrameAddr}; + CS = EmitRuntimeCallOrInvoke(SetJmp, Args); + } + CS.setAttributes(ReturnsTwiceAttr); + return RValue::get(CS.getInstruction()); + } + break; + } + + case Builtin::BI__GetExceptionInfo: { + if (llvm::GlobalVariable *GV = + CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) + return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); + break; + } } // If this is an alias for a lib function (e.g. __builtin_sin), emit @@ -1764,6 +1866,8 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, case llvm::Triple::r600: case llvm::Triple::amdgcn: return EmitR600BuiltinExpr(BuiltinID, E); + case llvm::Triple::systemz: + return EmitSystemZBuiltinExpr(BuiltinID, E); default: return nullptr; } @@ -2534,7 +2638,7 @@ Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, // Return type. SmallVector<llvm::Type *, 3> Tys; if (Modifier & AddRetType) { - llvm::Type *Ty = ConvertType(E->getCallReturnType()); + llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); if (Modifier & VectorizeRetType) Ty = llvm::VectorType::get( Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); @@ -2812,7 +2916,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Ops[2] = Builder.CreateBitCast(Ops[2], Ty); // NEON intrinsic puts accumulator first, unlike the LLVM fma. - return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); + return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); } case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: @@ -2825,7 +2929,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vld4_v: case NEON::BI__builtin_neon_vld4q_v: { Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); - Ops[1] = Builder.CreateCall2(F, Ops[1], Align, NameHint); + Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateStore(Ops[1], Ops[0]); @@ -3004,7 +3108,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Indices.push_back(Builder.getInt32(i+vi)); Indices.push_back(Builder.getInt32(i+e+vi)); } - Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); + Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = llvm::ConstantVector::get(Indices); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); SV = Builder.CreateStore(SV, Addr); @@ -3032,7 +3136,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); - Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); + Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = llvm::ConstantVector::get(Indices); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); SV = Builder.CreateStore(SV, Addr); @@ -3052,7 +3156,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); } - Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); + Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = llvm::ConstantVector::get(Indices); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); SV = Builder.CreateStore(SV, Addr); @@ -3185,7 +3289,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", /*SideEffects=*/true); - return Builder.CreateCall(Emit); + return Builder.CreateCall(Emit, {}); } if (BuiltinID == ARM::BI__builtin_arm_dbg) { @@ -3202,7 +3306,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); Value *F = CGM.getIntrinsic(Intrinsic::prefetch); - return Builder.CreateCall4(F, Address, RW, Locality, IsData); + return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } if (BuiltinID == ARM::BI__builtin_arm_rbit) { @@ -3300,7 +3404,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Value *Arg0 = Builder.CreateExtractValue(Val, 0); Value *Arg1 = Builder.CreateExtractValue(Val, 1); Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); - return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd"); + return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); } if (BuiltinID == ARM::BI__builtin_arm_strex || @@ -3324,12 +3428,12 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, ? Intrinsic::arm_stlex : Intrinsic::arm_strex, StoreAddr->getType()); - return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex"); + return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); } if (BuiltinID == ARM::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); - return Builder.CreateCall(F); + return Builder.CreateCall(F, {}); } // CRC32 @@ -3365,13 +3469,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); Function *F = CGM.getIntrinsic(CRCIntrinsicID); - Value *Res = Builder.CreateCall2(F, Arg0, Arg1a); - return Builder.CreateCall2(F, Res, Arg1b); + Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); + return Builder.CreateCall(F, {Res, Arg1b}); } else { Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); Function *F = CGM.getIntrinsic(CRCIntrinsicID); - return Builder.CreateCall2(F, Arg0, Arg1); + return Builder.CreateCall(F, {Arg0, Arg1}); } } @@ -3547,7 +3651,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Load the value as a one-element vector. Ty = llvm::VectorType::get(VTy->getElementType(), 1); Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty); - Value *Ld = Builder.CreateCall2(F, Ops[0], Align); + Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); // Combine them. SmallVector<Constant*, 2> Indices; Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane)); @@ -3582,7 +3686,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, default: llvm_unreachable("unknown vld_dup intrinsic?"); } Function *F = CGM.getIntrinsic(Int, Ty); - Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); + Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateStore(Ops[1], Ops[0]); @@ -3651,7 +3755,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; - Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); + Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); case NEON::BI__builtin_neon_vsri_n_v: case NEON::BI__builtin_neon_vsriq_n_v: @@ -3979,7 +4083,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify // PLDL3STRM or PLDL2STRM. Value *F = CGM.getIntrinsic(Intrinsic::prefetch); - return Builder.CreateCall4(F, Address, RW, Locality, IsData); + return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } if (BuiltinID == AArch64::BI__builtin_arm_rbit) { @@ -4074,9 +4178,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *Arg1 = Builder.CreateExtractValue(Val, 1); Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); - return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "stxp"); - } else if (BuiltinID == AArch64::BI__builtin_arm_strex || - BuiltinID == AArch64::BI__builtin_arm_stlex) { + return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); + } + + if (BuiltinID == AArch64::BI__builtin_arm_strex || + BuiltinID == AArch64::BI__builtin_arm_stlex) { Value *StoreVal = EmitScalarExpr(E->getArg(0)); Value *StoreAddr = EmitScalarExpr(E->getArg(1)); @@ -4096,12 +4202,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr, StoreAddr->getType()); - return Builder.CreateCall2(F, StoreVal, StoreAddr, "stxr"); + return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); } if (BuiltinID == AArch64::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); - return Builder.CreateCall(F); + return Builder.CreateCall(F, {}); } // CRC32 @@ -4133,7 +4239,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Type *DataTy = F->getFunctionType()->getParamType(1); Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); - return Builder.CreateCall2(F, Arg0, Arg1); + return Builder.CreateCall(F, {Arg0, Arg1}); } llvm::SmallVector<Value*, 4> Ops; @@ -4248,36 +4354,36 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vceqzs_f32: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( - Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OEQ, - ICmpInst::ICMP_EQ, "vceqz"); + Ops[0], ConvertType(E->getCallReturnType(getContext())), + ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); case NEON::BI__builtin_neon_vcgezd_s64: case NEON::BI__builtin_neon_vcgezd_f64: case NEON::BI__builtin_neon_vcgezs_f32: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( - Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGE, - ICmpInst::ICMP_SGE, "vcgez"); + Ops[0], ConvertType(E->getCallReturnType(getContext())), + ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); case NEON::BI__builtin_neon_vclezd_s64: case NEON::BI__builtin_neon_vclezd_f64: case NEON::BI__builtin_neon_vclezs_f32: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( - Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLE, - ICmpInst::ICMP_SLE, "vclez"); + Ops[0], ConvertType(E->getCallReturnType(getContext())), + ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); case NEON::BI__builtin_neon_vcgtzd_s64: case NEON::BI__builtin_neon_vcgtzd_f64: case NEON::BI__builtin_neon_vcgtzs_f32: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( - Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGT, - ICmpInst::ICMP_SGT, "vcgtz"); + Ops[0], ConvertType(E->getCallReturnType(getContext())), + ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); case NEON::BI__builtin_neon_vcltzd_s64: case NEON::BI__builtin_neon_vcltzd_f64: case NEON::BI__builtin_neon_vcltzs_f32: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( - Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLT, - ICmpInst::ICMP_SLT, "vcltz"); + Ops[0], ConvertType(E->getCallReturnType(getContext())), + ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); case NEON::BI__builtin_neon_vceqzd_u64: { llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); @@ -4528,8 +4634,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, : Intrinsic::aarch64_neon_srshl; Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); - Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Int64Ty), Ops[1], - Builder.CreateSExt(Ops[2], Int64Ty)); + Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), + {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); } case NEON::BI__builtin_neon_vshld_n_s64: @@ -4699,7 +4805,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[2] = Builder.CreateBitCast(Ops[2], VTy); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); - Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); + Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); return Builder.CreateBitCast(Result, Ty); } Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); @@ -4713,7 +4819,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, cast<ConstantInt>(Ops[3])); Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); + return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); } case NEON::BI__builtin_neon_vfmaq_laneq_v: { Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); @@ -4722,17 +4828,17 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); + return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); } case NEON::BI__builtin_neon_vfmas_lane_f32: case NEON::BI__builtin_neon_vfmas_laneq_f32: case NEON::BI__builtin_neon_vfmad_lane_f64: case NEON::BI__builtin_neon_vfmad_laneq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(3))); - llvm::Type *Ty = ConvertType(E->getCallReturnType()); + llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); + return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); } case NEON::BI__builtin_neon_vfms_v: case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types @@ -5667,7 +5773,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Indices.push_back(ConstantInt::get(Int32Ty, i+vi)); Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi)); } - Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); + Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = llvm::ConstantVector::get(Indices); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); SV = Builder.CreateStore(SV, Addr); @@ -5686,7 +5792,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); - Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); + Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = llvm::ConstantVector::get(Indices); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); SV = Builder.CreateStore(SV, Addr); @@ -5706,7 +5812,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); } - Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); + Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = llvm::ConstantVector::get(Indices); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); SV = Builder.CreateStore(SV, Addr); @@ -5817,7 +5923,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *Locality = EmitScalarExpr(E->getArg(1)); Value *Data = ConstantInt::get(Int32Ty, 1); Value *F = CGM.getIntrinsic(Intrinsic::prefetch); - return Builder.CreateCall4(F, Address, RW, Locality, Data); + return Builder.CreateCall(F, {Address, RW, Locality, Data}); } case X86::BI__builtin_ia32_vec_init_v8qi: case X86::BI__builtin_ia32_vec_init_v4hi: @@ -5856,104 +5962,95 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); return Builder.CreateStore(Ops[1], Ops[0]); } - case X86::BI__builtin_ia32_palignr: { - unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); - - // If palignr is shifting the pair of input vectors less than 9 bytes, - // emit a shuffle instruction. - if (shiftVal <= 8) { - SmallVector<llvm::Constant*, 8> Indices; - for (unsigned i = 0; i != 8; ++i) - Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); - - Value* SV = llvm::ConstantVector::get(Indices); - return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); + case X86::BI__builtin_ia32_palignr128: + case X86::BI__builtin_ia32_palignr256: { + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + assert(NumElts % 16 == 0); + unsigned NumLanes = NumElts / 16; + unsigned NumLaneElts = NumElts / NumLanes; + + // If palignr is shifting the pair of vectors more than the size of two + // lanes, emit zero. + if (ShiftVal >= (2 * NumLaneElts)) + return llvm::Constant::getNullValue(ConvertType(E->getType())); + + // If palignr is shifting the pair of input vectors more than one lane, + // but less than two lanes, convert to shifting in zeroes. + if (ShiftVal > NumLaneElts) { + ShiftVal -= NumLaneElts; + Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); } - // If palignr is shifting the pair of input vectors more than 8 but less - // than 16 bytes, emit a logical right shift of the destination. - if (shiftVal < 16) { - // MMX has these as 1 x i64 vectors for some odd optimization reasons. - llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1); - - Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); - Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8); - - // create i32 constant - llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q); - return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); + SmallVector<llvm::Constant*, 32> Indices; + // 256-bit palignr operates on 128-bit lanes so we need to handle that + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Idx = ShiftVal + i; + if (Idx >= NumLaneElts) + Idx += NumElts - NumLaneElts; // End of lane, switch operand. + Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); + } } - // If palignr is shifting the pair of vectors more than 16 bytes, emit zero. - return llvm::Constant::getNullValue(ConvertType(E->getType())); + Value* SV = llvm::ConstantVector::get(Indices); + return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); } - case X86::BI__builtin_ia32_palignr128: { - unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + case X86::BI__builtin_ia32_pslldqi256: { + // Shift value is in bits so divide by 8. + unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3; - // If palignr is shifting the pair of input vectors less than 17 bytes, - // emit a shuffle instruction. - if (shiftVal <= 16) { - SmallVector<llvm::Constant*, 16> Indices; - for (unsigned i = 0; i != 16; ++i) - Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); + // If pslldq is shifting the vector more than 15 bytes, emit zero. + if (shiftVal >= 16) + return llvm::Constant::getNullValue(ConvertType(E->getType())); - Value* SV = llvm::ConstantVector::get(Indices); - return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); + SmallVector<llvm::Constant*, 32> Indices; + // 256-bit pslldq operates on 128-bit lanes so we need to handle that + for (unsigned l = 0; l != 32; l += 16) { + for (unsigned i = 0; i != 16; ++i) { + unsigned Idx = 32 + i - shiftVal; + if (Idx < 32) Idx -= 16; // end of lane, switch operand. + Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); + } } - // If palignr is shifting the pair of input vectors more than 16 but less - // than 32 bytes, emit a logical right shift of the destination. - if (shiftVal < 32) { - llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); - - Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); - Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); - - // create i32 constant - llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq); - return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); - } + llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); + Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); + Value *Zero = llvm::Constant::getNullValue(VecTy); - // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. - return llvm::Constant::getNullValue(ConvertType(E->getType())); - } - case X86::BI__builtin_ia32_palignr256: { - unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); - - // If palignr is shifting the pair of input vectors less than 17 bytes, - // emit a shuffle instruction. - if (shiftVal <= 16) { - SmallVector<llvm::Constant*, 32> Indices; - // 256-bit palignr operates on 128-bit lanes so we need to handle that - for (unsigned l = 0; l != 2; ++l) { - unsigned LaneStart = l * 16; - unsigned LaneEnd = (l+1) * 16; - for (unsigned i = 0; i != 16; ++i) { - unsigned Idx = shiftVal + i + LaneStart; - if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand - Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx)); - } + Value *SV = llvm::ConstantVector::get(Indices); + SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq"); + llvm::Type *ResultType = ConvertType(E->getType()); + return Builder.CreateBitCast(SV, ResultType, "cast"); + } + case X86::BI__builtin_ia32_psrldqi256: { + // Shift value is in bits so divide by 8. + unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3; + + // If psrldq is shifting the vector more than 15 bytes, emit zero. + if (shiftVal >= 16) + return llvm::Constant::getNullValue(ConvertType(E->getType())); + + SmallVector<llvm::Constant*, 32> Indices; + // 256-bit psrldq operates on 128-bit lanes so we need to handle that + for (unsigned l = 0; l != 32; l += 16) { + for (unsigned i = 0; i != 16; ++i) { + unsigned Idx = i + shiftVal; + if (Idx >= 16) Idx += 16; // end of lane, switch operand. + Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); } - - Value* SV = llvm::ConstantVector::get(Indices); - return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); } - // If palignr is shifting the pair of input vectors more than 16 but less - // than 32 bytes, emit a logical right shift of the destination. - if (shiftVal < 32) { - llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4); - - Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); - Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); - - // create i32 constant - llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq); - return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); - } + llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); + Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); + Value *Zero = llvm::Constant::getNullValue(VecTy); - // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. - return llvm::Constant::getNullValue(ConvertType(E->getType())); + Value *SV = llvm::ConstantVector::get(Indices); + SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq"); + llvm::Type *ResultType = ConvertType(E->getType()); + return Builder.CreateBitCast(SV, ResultType, "cast"); } case X86::BI__builtin_ia32_movntps: case X86::BI__builtin_ia32_movntps256: @@ -5987,20 +6084,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { - const char *name; - Intrinsic::ID ID; - switch(BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_pswapdsf: - case X86::BI__builtin_ia32_pswapdsi: - name = "pswapd"; - ID = Intrinsic::x86_3dnowa_pswapd; - break; - } llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); - llvm::Function *F = CGM.getIntrinsic(ID); - return Builder.CreateCall(F, Ops, name); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); + return Builder.CreateCall(F, Ops, "pswapd"); } case X86::BI__builtin_ia32_rdrand16_step: case X86::BI__builtin_ia32_rdrand32_step: @@ -6031,17 +6118,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, break; } - Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); + Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {}); Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]); return Builder.CreateExtractValue(Call, 1); } - // AVX2 broadcast - case X86::BI__builtin_ia32_vbroadcastsi256: { - Value *VecTmp = CreateMemTemp(E->getArg(0)->getType()); - Builder.CreateStore(Ops[0], VecTmp); - Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128); - return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy)); - } // SSE comparison intrisics case X86::BI__builtin_ia32_cmpeqps: case X86::BI__builtin_ia32_cmpltps: @@ -6318,7 +6398,7 @@ static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF, llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall3(F, Src0, Src1, Src2); + return CGF.Builder.CreateCall(F, {Src0, Src1, Src2}); } // Emit an intrinsic that has 1 float or double operand, and 1 integer. @@ -6329,7 +6409,7 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF, llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall2(F, Src0, Src1); + return CGF.Builder.CreateCall(F, {Src0, Src1}); } Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, @@ -6350,7 +6430,7 @@ Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale, X->getType()); - llvm::Value *Tmp = Builder.CreateCall3(Callee, X, Y, Z); + llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); @@ -6373,7 +6453,7 @@ Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas, Src0->getType()); llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); - return Builder.CreateCall4(F, Src0, Src1, Src2, Src3ToBool); + return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); } case R600::BI__builtin_amdgpu_div_fixup: case R600::BI__builtin_amdgpu_div_fixupf: @@ -6400,3 +6480,246 @@ Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, return nullptr; } } + +/// Handle a SystemZ function in which the final argument is a pointer +/// to an int that receives the post-instruction CC value. At the LLVM level +/// this is represented as a function that returns a {result, cc} pair. +static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, + unsigned IntrinsicID, + const CallExpr *E) { + unsigned NumArgs = E->getNumArgs() - 1; + SmallVector<Value *, 8> Args(NumArgs); + for (unsigned I = 0; I < NumArgs; ++I) + Args[I] = CGF.EmitScalarExpr(E->getArg(I)); + Value *CCPtr = CGF.EmitScalarExpr(E->getArg(NumArgs)); + Value *F = CGF.CGM.getIntrinsic(IntrinsicID); + Value *Call = CGF.Builder.CreateCall(F, Args); + Value *CC = CGF.Builder.CreateExtractValue(Call, 1); + CGF.Builder.CreateStore(CC, CCPtr); + return CGF.Builder.CreateExtractValue(Call, 0); +} + +Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + switch (BuiltinID) { + case SystemZ::BI__builtin_tbegin: { + Value *TDB = EmitScalarExpr(E->getArg(0)); + Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); + Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); + return Builder.CreateCall(F, {TDB, Control}); + } + case SystemZ::BI__builtin_tbegin_nofloat: { + Value *TDB = EmitScalarExpr(E->getArg(0)); + Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); + Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); + return Builder.CreateCall(F, {TDB, Control}); + } + case SystemZ::BI__builtin_tbeginc: { + Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); + Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); + Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); + return Builder.CreateCall(F, {TDB, Control}); + } + case SystemZ::BI__builtin_tabort: { + Value *Data = EmitScalarExpr(E->getArg(0)); + Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); + return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); + } + case SystemZ::BI__builtin_non_tx_store: { + Value *Address = EmitScalarExpr(E->getArg(0)); + Value *Data = EmitScalarExpr(E->getArg(1)); + Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); + return Builder.CreateCall(F, {Data, Address}); + } + + // Vector builtins. Note that most vector builtins are mapped automatically + // to target-specific LLVM intrinsics. The ones handled specially here can + // be represented via standard LLVM IR, which is preferable to enable common + // LLVM optimizations. + + case SystemZ::BI__builtin_s390_vpopctb: + case SystemZ::BI__builtin_s390_vpopcth: + case SystemZ::BI__builtin_s390_vpopctf: + case SystemZ::BI__builtin_s390_vpopctg: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); + return Builder.CreateCall(F, X); + } + + case SystemZ::BI__builtin_s390_vclzb: + case SystemZ::BI__builtin_s390_vclzh: + case SystemZ::BI__builtin_s390_vclzf: + case SystemZ::BI__builtin_s390_vclzg: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); + return Builder.CreateCall(F, {X, Undef}); + } + + case SystemZ::BI__builtin_s390_vctzb: + case SystemZ::BI__builtin_s390_vctzh: + case SystemZ::BI__builtin_s390_vctzf: + case SystemZ::BI__builtin_s390_vctzg: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); + return Builder.CreateCall(F, {X, Undef}); + } + + case SystemZ::BI__builtin_s390_vfsqdb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); + return Builder.CreateCall(F, X); + } + case SystemZ::BI__builtin_s390_vfmadb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *Z = EmitScalarExpr(E->getArg(2)); + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + return Builder.CreateCall(F, {X, Y, Z}); + } + case SystemZ::BI__builtin_s390_vfmsdb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *Z = EmitScalarExpr(E->getArg(2)); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); + } + case SystemZ::BI__builtin_s390_vflpdb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); + return Builder.CreateCall(F, X); + } + case SystemZ::BI__builtin_s390_vflndb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); + Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); + return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); + } + case SystemZ::BI__builtin_s390_vfidb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + // Constant-fold the M4 and M5 mask arguments. + llvm::APSInt M4, M5; + bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); + bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); + assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); + (void)IsConstM4; (void)IsConstM5; + // Check whether this instance of vfidb can be represented via a LLVM + // standard intrinsic. We only support some combinations of M4 and M5. + Intrinsic::ID ID = Intrinsic::not_intrinsic; + switch (M4.getZExtValue()) { + default: break; + case 0: // IEEE-inexact exception allowed + switch (M5.getZExtValue()) { + default: break; + case 0: ID = Intrinsic::rint; break; + } + break; + case 4: // IEEE-inexact exception suppressed + switch (M5.getZExtValue()) { + default: break; + case 0: ID = Intrinsic::nearbyint; break; + case 1: ID = Intrinsic::round; break; + case 5: ID = Intrinsic::trunc; break; + case 6: ID = Intrinsic::ceil; break; + case 7: ID = Intrinsic::floor; break; + } + break; + } + if (ID != Intrinsic::not_intrinsic) { + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, X); + } + Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb); + Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); + Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); + return Builder.CreateCall(F, {X, M4Value, M5Value}); + } + + // Vector intrisincs that output the post-instruction CC value. + +#define INTRINSIC_WITH_CC(NAME) \ + case SystemZ::BI__builtin_##NAME: \ + return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) + + INTRINSIC_WITH_CC(s390_vpkshs); + INTRINSIC_WITH_CC(s390_vpksfs); + INTRINSIC_WITH_CC(s390_vpksgs); + + INTRINSIC_WITH_CC(s390_vpklshs); + INTRINSIC_WITH_CC(s390_vpklsfs); + INTRINSIC_WITH_CC(s390_vpklsgs); + + INTRINSIC_WITH_CC(s390_vceqbs); + INTRINSIC_WITH_CC(s390_vceqhs); + INTRINSIC_WITH_CC(s390_vceqfs); + INTRINSIC_WITH_CC(s390_vceqgs); + + INTRINSIC_WITH_CC(s390_vchbs); + INTRINSIC_WITH_CC(s390_vchhs); + INTRINSIC_WITH_CC(s390_vchfs); + INTRINSIC_WITH_CC(s390_vchgs); + + INTRINSIC_WITH_CC(s390_vchlbs); + INTRINSIC_WITH_CC(s390_vchlhs); + INTRINSIC_WITH_CC(s390_vchlfs); + INTRINSIC_WITH_CC(s390_vchlgs); + + INTRINSIC_WITH_CC(s390_vfaebs); + INTRINSIC_WITH_CC(s390_vfaehs); + INTRINSIC_WITH_CC(s390_vfaefs); + + INTRINSIC_WITH_CC(s390_vfaezbs); + INTRINSIC_WITH_CC(s390_vfaezhs); + INTRINSIC_WITH_CC(s390_vfaezfs); + + INTRINSIC_WITH_CC(s390_vfeebs); + INTRINSIC_WITH_CC(s390_vfeehs); + INTRINSIC_WITH_CC(s390_vfeefs); + + INTRINSIC_WITH_CC(s390_vfeezbs); + INTRINSIC_WITH_CC(s390_vfeezhs); + INTRINSIC_WITH_CC(s390_vfeezfs); + + INTRINSIC_WITH_CC(s390_vfenebs); + INTRINSIC_WITH_CC(s390_vfenehs); + INTRINSIC_WITH_CC(s390_vfenefs); + + INTRINSIC_WITH_CC(s390_vfenezbs); + INTRINSIC_WITH_CC(s390_vfenezhs); + INTRINSIC_WITH_CC(s390_vfenezfs); + + INTRINSIC_WITH_CC(s390_vistrbs); + INTRINSIC_WITH_CC(s390_vistrhs); + INTRINSIC_WITH_CC(s390_vistrfs); + + INTRINSIC_WITH_CC(s390_vstrcbs); + INTRINSIC_WITH_CC(s390_vstrchs); + INTRINSIC_WITH_CC(s390_vstrcfs); + + INTRINSIC_WITH_CC(s390_vstrczbs); + INTRINSIC_WITH_CC(s390_vstrczhs); + INTRINSIC_WITH_CC(s390_vstrczfs); + + INTRINSIC_WITH_CC(s390_vfcedbs); + INTRINSIC_WITH_CC(s390_vfchdbs); + INTRINSIC_WITH_CC(s390_vfchedbs); + + INTRINSIC_WITH_CC(s390_vftcidb); + +#undef INTRINSIC_WITH_CC + + default: + return nullptr; + } +} |