diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp | 530 |
1 files changed, 420 insertions, 110 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp index 4f68b34..635e342 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp @@ -20,7 +20,9 @@ #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" using namespace clang; @@ -113,7 +115,8 @@ static RValue EmitBinaryAtomic(CodeGenFunction &CGF, static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, - Instruction::BinaryOps Op) { + Instruction::BinaryOps Op, + bool Invert = false) { QualType T = E->getType(); assert(E->getArg(0)->getType()->isPointerType()); assert(CGF.getContext().hasSameUnqualifiedType(T, @@ -138,36 +141,25 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], llvm::SequentiallyConsistent); Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); + if (Invert) + Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, + llvm::ConstantInt::get(IntType, -1)); Result = EmitFromInt(CGF, Result, T, ValueType); return RValue::get(Result); } -/// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy, -/// which must be a scalar floating point type. -static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) { - const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>(); - assert(ValTyP && "isn't scalar fp type!"); - - StringRef FnName; - switch (ValTyP->getKind()) { - default: llvm_unreachable("Isn't a scalar fp type!"); - case BuiltinType::Float: FnName = "fabsf"; break; - case BuiltinType::Double: FnName = "fabs"; break; - case BuiltinType::LongDouble: FnName = "fabsl"; break; - } - - // The prototype is something that takes and returns whatever V's type is. - llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(), - false); - llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName); - - return CGF.EmitNounwindRuntimeCall(Fn, V, "abs"); +/// EmitFAbs - Emit a call to @llvm.fabs(). +static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { + Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); + llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); + Call->setDoesNotAccessMemory(); + return Call; } static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, const CallExpr *E, llvm::Value *calleeValue) { - return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E->getLocStart(), - ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn); + return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E, + ReturnValueSlot(), Fn); } /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* @@ -195,7 +187,8 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, } RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, - unsigned BuiltinID, const CallExpr *E) { + unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue) { // See if we can constant fold this builtin. If so, don't emit it at all. Expr::EvalResult Result; if (E->EvaluateAsRValue(Result, CGM.getContext()) && @@ -255,6 +248,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Result); } + case Builtin::BI__builtin_fabs: + case Builtin::BI__builtin_fabsf: + case Builtin::BI__builtin_fabsl: { + Value *Arg1 = EmitScalarExpr(E->getArg(0)); + Value *Result = EmitFAbs(*this, Arg1); + return RValue::get(Result); + } + case Builtin::BI__builtin_fmod: + case Builtin::BI__builtin_fmodf: + case Builtin::BI__builtin_fmodl: { + Value *Arg1 = EmitScalarExpr(E->getArg(0)); + Value *Arg2 = EmitScalarExpr(E->getArg(1)); + Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); + return RValue::get(Result); + } case Builtin::BI__builtin_conj: case Builtin::BI__builtin_conjf: @@ -388,6 +396,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, "expval"); return RValue::get(Result); } + case Builtin::BI__builtin_assume_aligned: { + Value *PtrValue = EmitScalarExpr(E->getArg(0)); + Value *OffsetValue = + (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; + + Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); + ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); + unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); + + EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); + return RValue::get(PtrValue); + } + case Builtin::BI__assume: + case Builtin::BI__builtin_assume: { + if (E->getArg(0)->HasSideEffects(getContext())) + return RValue::get(nullptr); + + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); + return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); + } case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: { @@ -447,11 +476,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateCall(F)); } case Builtin::BI__builtin_unreachable: { - if (SanOpts->Unreachable) { + if (SanOpts.has(SanitizerKind::Unreachable)) { SanitizerScope SanScope(this); - EmitCheck(Builder.getFalse(), "builtin_unreachable", - EmitCheckSourceLocation(E->getExprLoc()), - ArrayRef<llvm::Value *>(), CRK_Unrecoverable); + EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), + SanitizerKind::Unreachable), + "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()), + None); } else Builder.CreateUnreachable(); @@ -515,7 +545,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_isinf: { // isinf(x) --> fabs(x) == infinity Value *V = EmitScalarExpr(E->getArg(0)); - V = EmitFAbs(*this, V, E->getArg(0)->getType()); + V = EmitFAbs(*this, V); V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); @@ -529,7 +559,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *V = EmitScalarExpr(E->getArg(0)); Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); - Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); + Value *Abs = EmitFAbs(*this, V); Value *IsLessThanInf = Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); APFloat Smallest = APFloat::getSmallestNormalized( @@ -547,7 +577,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *V = EmitScalarExpr(E->getArg(0)); Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); - Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); + Value *Abs = EmitFAbs(*this, V); Value *IsNotInf = Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); @@ -586,7 +616,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // if (fabs(V) == infinity) return FP_INFINITY Builder.SetInsertPoint(NotNan); - Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType()); + Value *VAbs = EmitFAbs(*this, V); Value *IsInf = Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), "isinf"); @@ -864,11 +894,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__sync_fetch_and_or: case Builtin::BI__sync_fetch_and_and: case Builtin::BI__sync_fetch_and_xor: + case Builtin::BI__sync_fetch_and_nand: case Builtin::BI__sync_add_and_fetch: case Builtin::BI__sync_sub_and_fetch: case Builtin::BI__sync_and_and_fetch: case Builtin::BI__sync_or_and_fetch: case Builtin::BI__sync_xor_and_fetch: + case Builtin::BI__sync_nand_and_fetch: case Builtin::BI__sync_val_compare_and_swap: case Builtin::BI__sync_bool_compare_and_swap: case Builtin::BI__sync_lock_test_and_set: @@ -905,6 +937,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__sync_fetch_and_xor_8: case Builtin::BI__sync_fetch_and_xor_16: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); + case Builtin::BI__sync_fetch_and_nand_1: + case Builtin::BI__sync_fetch_and_nand_2: + case Builtin::BI__sync_fetch_and_nand_4: + case Builtin::BI__sync_fetch_and_nand_8: + case Builtin::BI__sync_fetch_and_nand_16: + return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); // Clang extensions: not overloaded yet. case Builtin::BI__sync_fetch_and_min: @@ -951,6 +989,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__sync_xor_and_fetch_16: return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, llvm::Instruction::Xor); + case Builtin::BI__sync_nand_and_fetch_1: + case Builtin::BI__sync_nand_and_fetch_2: + case Builtin::BI__sync_nand_and_fetch_4: + case Builtin::BI__sync_nand_and_fetch_8: + case Builtin::BI__sync_nand_and_fetch_16: + return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, + llvm::Instruction::And, true); case Builtin::BI__sync_val_compare_and_swap_1: case Builtin::BI__sync_val_compare_and_swap_2: @@ -1347,11 +1392,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgTy = Arg->getType(); - if (ArgTy->isPPC_FP128Ty()) - break; // FIXME: I'm not sure what the right implementation is here. int ArgWidth = ArgTy->getPrimitiveSizeInBits(); llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth); Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy); + if (ArgTy->isPPC_FP128Ty()) { + // The higher-order double comes first, and so we need to truncate the + // pair to extract the overall sign. The order of the pair is the same + // in both little- and big-Endian modes. + ArgWidth >>= 1; + ArgIntTy = llvm::IntegerType::get(C, ArgWidth); + BCArg = Builder.CreateTrunc(BCArg, ArgIntTy); + } Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy); Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp); return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType()))); @@ -1518,9 +1569,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__noop: // __noop always evaluates to an integer literal zero. return RValue::get(ConstantInt::get(IntTy, 0)); - case Builtin::BI__assume: - // Until LLVM supports assumptions at the IR level, this becomes nothing. - return RValue::get(nullptr); + case Builtin::BI__builtin_call_with_static_chain: { + const CallExpr *Call = cast<CallExpr>(E->getArg(0)); + const Expr *Chain = E->getArg(1); + return EmitCall(Call->getCallee()->getType(), + EmitScalarExpr(Call->getCallee()), Call, ReturnValue, + Call->getCalleeDecl(), EmitScalarExpr(Chain)); + } case Builtin::BI_InterlockedExchange: case Builtin::BI_InterlockedExchangePointer: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); @@ -1587,6 +1642,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, RMWI->setVolatile(true); return RValue::get(RMWI); } + case Builtin::BI__readfsdword: { + Value *IntToPtr = + Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), + llvm::PointerType::get(CGM.Int32Ty, 257)); + LoadInst *Load = + Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true); + return RValue::get(Load); + } } // If this is an alias for a lib function (e.g. __builtin_sin), emit @@ -1690,8 +1753,6 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, return EmitARMBuiltinExpr(BuiltinID, E); case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: - case llvm::Triple::arm64: - case llvm::Triple::arm64_be: return EmitAArch64BuiltinExpr(BuiltinID, E); case llvm::Triple::x86: case llvm::Triple::x86_64: @@ -1701,6 +1762,7 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, case llvm::Triple::ppc64le: return EmitPPCBuiltinExpr(BuiltinID, E); case llvm::Triple::r600: + case llvm::Triple::amdgcn: return EmitR600BuiltinExpr(BuiltinID, E); default: return nullptr; @@ -2005,8 +2067,12 @@ static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), NEONMAP1(vld4q_v, arm_neon_vld4, 0), NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), + NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), + NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), + NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), + NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), NEONMAP0(vmovl_v), NEONMAP0(vmovn_v), @@ -2042,6 +2108,8 @@ static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), + NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), + NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), @@ -2051,8 +2119,22 @@ static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), + NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), + NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), + NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), + NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), + NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), + NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), + NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), + NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), + NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), + NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), + NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), + NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), + NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), + NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), @@ -2173,6 +2255,8 @@ static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), + NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), + NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), @@ -2184,6 +2268,8 @@ static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), + NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), + NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), @@ -2828,6 +2914,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vqshlq_n_v: return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 1, false); + case NEON::BI__builtin_neon_vqshlu_n_v: + case NEON::BI__builtin_neon_vqshluq_n_v: + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", + 1, false); case NEON::BI__builtin_neon_vrecpe_v: case NEON::BI__builtin_neon_vrecpeq_v: case NEON::BI__builtin_neon_vrsqrte_v: @@ -2835,6 +2925,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); + case NEON::BI__builtin_neon_vrshr_n_v: + case NEON::BI__builtin_neon_vrshrq_n_v: + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", + 1, true); case NEON::BI__builtin_neon_vshl_n_v: case NEON::BI__builtin_neon_vshlq_n_v: Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); @@ -3039,39 +3133,76 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, return CGF.EmitNeonCall(TblF, TblOps, Name); } -Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - unsigned HintID = static_cast<unsigned>(-1); +Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { switch (BuiltinID) { - default: break; + default: + return nullptr; case ARM::BI__builtin_arm_nop: - HintID = 0; - break; + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), + llvm::ConstantInt::get(Int32Ty, 0)); case ARM::BI__builtin_arm_yield: case ARM::BI__yield: - HintID = 1; - break; + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), + llvm::ConstantInt::get(Int32Ty, 1)); case ARM::BI__builtin_arm_wfe: case ARM::BI__wfe: - HintID = 2; - break; + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), + llvm::ConstantInt::get(Int32Ty, 2)); case ARM::BI__builtin_arm_wfi: case ARM::BI__wfi: - HintID = 3; - break; + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), + llvm::ConstantInt::get(Int32Ty, 3)); case ARM::BI__builtin_arm_sev: case ARM::BI__sev: - HintID = 4; - break; + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), + llvm::ConstantInt::get(Int32Ty, 4)); case ARM::BI__builtin_arm_sevl: case ARM::BI__sevl: - HintID = 5; - break; + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), + llvm::ConstantInt::get(Int32Ty, 5)); } +} - if (HintID != static_cast<unsigned>(-1)) { - Function *F = CGM.getIntrinsic(Intrinsic::arm_hint); - return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); +Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + if (auto Hint = GetValueForARMHint(BuiltinID)) + return Hint; + + if (BuiltinID == ARM::BI__emit) { + bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; + llvm::FunctionType *FTy = + llvm::FunctionType::get(VoidTy, /*Variadic=*/false); + + APSInt Value; + if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) + llvm_unreachable("Sema will ensure that the parameter is constant"); + + uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); + + llvm::InlineAsm *Emit = + IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", + /*SideEffects=*/true) + : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", + /*SideEffects=*/true); + + return Builder.CreateCall(Emit); + } + + if (BuiltinID == ARM::BI__builtin_arm_dbg) { + Value *Option = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); + } + + if (BuiltinID == ARM::BI__builtin_arm_prefetch) { + Value *Address = EmitScalarExpr(E->getArg(0)); + Value *RW = EmitScalarExpr(E->getArg(1)); + Value *IsData = EmitScalarExpr(E->getArg(2)); + + // Locality is not supported on ARM target + Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); + + Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + return Builder.CreateCall4(F, Address, RW, Locality, IsData); } if (BuiltinID == ARM::BI__builtin_arm_rbit) { @@ -3157,7 +3288,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd); - llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL); + llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr); Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); Value *Val = EmitScalarExpr(E->getArg(0)); @@ -3393,7 +3524,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Many NEON builtins have identical semantics and uses in ARM and // AArch64. Emit these in a single function. - ArrayRef<NeonIntrinsicInfo> IntrinsicMap(ARMSIMDIntrinsicMap); + auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); if (Builtin) @@ -3500,10 +3631,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vqrshrun_n_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), Ops, "vqrshrun_n", 1, true); - case NEON::BI__builtin_neon_vqshlu_n_v: - case NEON::BI__builtin_neon_vqshluq_n_v: - return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty), - Ops, "vqshlu", 1, false); case NEON::BI__builtin_neon_vqshrn_n_v: Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", @@ -3518,10 +3645,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vrshrn_n_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), Ops, "vrshrn_n", 1, true); - case NEON::BI__builtin_neon_vrshr_n_v: - case NEON::BI__builtin_neon_vrshrq_n_v: - Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true); case NEON::BI__builtin_neon_vrsra_n_v: case NEON::BI__builtin_neon_vrsraq_n_v: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); @@ -3836,6 +3959,29 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); } + if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { + Value *Address = EmitScalarExpr(E->getArg(0)); + Value *RW = EmitScalarExpr(E->getArg(1)); + Value *CacheLevel = EmitScalarExpr(E->getArg(2)); + Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); + Value *IsData = EmitScalarExpr(E->getArg(4)); + + Value *Locality = nullptr; + if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { + // Temporal fetch, needs to convert cache level to locality. + Locality = llvm::ConstantInt::get(Int32Ty, + -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); + } else { + // Streaming fetch. + Locality = llvm::ConstantInt::get(Int32Ty, 0); + } + + // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify + // PLDL3STRM or PLDL2STRM. + Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + return Builder.CreateCall4(F, Address, RW, Locality, IsData); + } + if (BuiltinID == AArch64::BI__builtin_arm_rbit) { assert((getContext().getTypeSize(E->getType()) == 32) && "rbit of unusual size!"); @@ -3913,7 +4059,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp); - llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, NULL); + llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr); Value *One = llvm::ConstantInt::get(Int32Ty, 1); Value *Tmp = Builder.CreateAlloca(ConvertType(E->getArg(0)->getType()), @@ -3994,7 +4140,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) Ops.push_back(EmitScalarExpr(E->getArg(i))); - ArrayRef<NeonIntrinsicInfo> SISDMap(AArch64SISDIntrinsicMap); + auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); @@ -4675,38 +4821,19 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f64Type), Ops, "vrecps"); } - case NEON::BI__builtin_neon_vrshr_n_v: - case NEON::BI__builtin_neon_vrshrq_n_v: - // FIXME: this can be shared with 32-bit ARM, but not AArch64 at the - // moment. After the final merge it should be added to - // EmitCommonNeonBuiltinExpr. - Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true); - case NEON::BI__builtin_neon_vqshlu_n_v: - case NEON::BI__builtin_neon_vqshluq_n_v: - // FIXME: AArch64 and ARM use different intrinsics for this, but are - // essentially compatible. It should be in EmitCommonNeonBuiltinExpr after - // the final merge. - Int = Intrinsic::aarch64_neon_sqshlu; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 1, false); case NEON::BI__builtin_neon_vqshrun_n_v: - // FIXME: as above Int = Intrinsic::aarch64_neon_sqshrun; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); case NEON::BI__builtin_neon_vqrshrun_n_v: - // FIXME: and again. Int = Intrinsic::aarch64_neon_sqrshrun; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); case NEON::BI__builtin_neon_vqshrn_n_v: - // FIXME: guess Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); case NEON::BI__builtin_neon_vrshrn_n_v: - // FIXME: there might be a pattern here. Int = Intrinsic::aarch64_neon_rshrn; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); case NEON::BI__builtin_neon_vqrshrn_n_v: - // FIXME: another one Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); case NEON::BI__builtin_neon_vrnda_v: @@ -5435,8 +5562,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[3] = Builder.CreateZExt(Ops[3], llvm::IntegerType::get(getLLVMContext(), 64)); - Ops[1] = Builder.CreateCall(F, - ArrayRef<Value*>(Ops).slice(1), "vld2_lane"); + Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateStore(Ops[1], Ops[0]); @@ -5452,8 +5578,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[3] = Builder.CreateBitCast(Ops[3], Ty); Ops[4] = Builder.CreateZExt(Ops[4], llvm::IntegerType::get(getLLVMContext(), 64)); - Ops[1] = Builder.CreateCall(F, - ArrayRef<Value*>(Ops).slice(1), "vld3_lane"); + Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateStore(Ops[1], Ops[0]); @@ -5470,8 +5595,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[4] = Builder.CreateBitCast(Ops[4], Ty); Ops[5] = Builder.CreateZExt(Ops[5], llvm::IntegerType::get(getLLVMContext(), 64)); - Ops[1] = Builder.CreateCall(F, - ArrayRef<Value*>(Ops).slice(1), "vld4_lane"); + Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateStore(Ops[1], Ops[0]); @@ -5757,7 +5881,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // create i32 constant llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q); - return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr"); + return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); } // If palignr is shifting the pair of vectors more than 16 bytes, emit zero. @@ -5787,7 +5911,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // create i32 constant llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq); - return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr"); + return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); } // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. @@ -5825,7 +5949,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // create i32 constant llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq); - return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr"); + return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); } // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. @@ -5839,8 +5963,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_movntdq256: case X86::BI__builtin_ia32_movnti: case X86::BI__builtin_ia32_movnti64: { - llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(), - Builder.getInt32(1)); + llvm::MDNode *Node = llvm::MDNode::get( + getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); // Convert the type of the pointer to a pointer to the stored type. Value *BC = Builder.CreateBitCast(Ops[0], @@ -5863,8 +5987,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { - const char *name = nullptr; - Intrinsic::ID ID = Intrinsic::not_intrinsic; + const char *name; + Intrinsic::ID ID; switch(BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_pswapdsf: @@ -5918,6 +6042,154 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128); return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy)); } + // SSE comparison intrisics + case X86::BI__builtin_ia32_cmpeqps: + case X86::BI__builtin_ia32_cmpltps: + case X86::BI__builtin_ia32_cmpleps: + case X86::BI__builtin_ia32_cmpunordps: + case X86::BI__builtin_ia32_cmpneqps: + case X86::BI__builtin_ia32_cmpnltps: + case X86::BI__builtin_ia32_cmpnleps: + case X86::BI__builtin_ia32_cmpordps: + case X86::BI__builtin_ia32_cmpeqss: + case X86::BI__builtin_ia32_cmpltss: + case X86::BI__builtin_ia32_cmpless: + case X86::BI__builtin_ia32_cmpunordss: + case X86::BI__builtin_ia32_cmpneqss: + case X86::BI__builtin_ia32_cmpnltss: + case X86::BI__builtin_ia32_cmpnless: + case X86::BI__builtin_ia32_cmpordss: + case X86::BI__builtin_ia32_cmpeqpd: + case X86::BI__builtin_ia32_cmpltpd: + case X86::BI__builtin_ia32_cmplepd: + case X86::BI__builtin_ia32_cmpunordpd: + case X86::BI__builtin_ia32_cmpneqpd: + case X86::BI__builtin_ia32_cmpnltpd: + case X86::BI__builtin_ia32_cmpnlepd: + case X86::BI__builtin_ia32_cmpordpd: + case X86::BI__builtin_ia32_cmpeqsd: + case X86::BI__builtin_ia32_cmpltsd: + case X86::BI__builtin_ia32_cmplesd: + case X86::BI__builtin_ia32_cmpunordsd: + case X86::BI__builtin_ia32_cmpneqsd: + case X86::BI__builtin_ia32_cmpnltsd: + case X86::BI__builtin_ia32_cmpnlesd: + case X86::BI__builtin_ia32_cmpordsd: + // These exist so that the builtin that takes an immediate can be bounds + // checked by clang to avoid passing bad immediates to the backend. Since + // AVX has a larger immediate than SSE we would need separate builtins to + // do the different bounds checking. Rather than create a clang specific + // SSE only builtin, this implements eight separate builtins to match gcc + // implementation. + + // Choose the immediate. + unsigned Imm; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_cmpeqps: + case X86::BI__builtin_ia32_cmpeqss: + case X86::BI__builtin_ia32_cmpeqpd: + case X86::BI__builtin_ia32_cmpeqsd: + Imm = 0; + break; + case X86::BI__builtin_ia32_cmpltps: + case X86::BI__builtin_ia32_cmpltss: + case X86::BI__builtin_ia32_cmpltpd: + case X86::BI__builtin_ia32_cmpltsd: + Imm = 1; + break; + case X86::BI__builtin_ia32_cmpleps: + case X86::BI__builtin_ia32_cmpless: + case X86::BI__builtin_ia32_cmplepd: + case X86::BI__builtin_ia32_cmplesd: + Imm = 2; + break; + case X86::BI__builtin_ia32_cmpunordps: + case X86::BI__builtin_ia32_cmpunordss: + case X86::BI__builtin_ia32_cmpunordpd: + case X86::BI__builtin_ia32_cmpunordsd: + Imm = 3; + break; + case X86::BI__builtin_ia32_cmpneqps: + case X86::BI__builtin_ia32_cmpneqss: + case X86::BI__builtin_ia32_cmpneqpd: + case X86::BI__builtin_ia32_cmpneqsd: + Imm = 4; + break; + case X86::BI__builtin_ia32_cmpnltps: + case X86::BI__builtin_ia32_cmpnltss: + case X86::BI__builtin_ia32_cmpnltpd: + case X86::BI__builtin_ia32_cmpnltsd: + Imm = 5; + break; + case X86::BI__builtin_ia32_cmpnleps: + case X86::BI__builtin_ia32_cmpnless: + case X86::BI__builtin_ia32_cmpnlepd: + case X86::BI__builtin_ia32_cmpnlesd: + Imm = 6; + break; + case X86::BI__builtin_ia32_cmpordps: + case X86::BI__builtin_ia32_cmpordss: + case X86::BI__builtin_ia32_cmpordpd: + case X86::BI__builtin_ia32_cmpordsd: + Imm = 7; + break; + } + + // Choose the intrinsic ID. + const char *name; + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_cmpeqps: + case X86::BI__builtin_ia32_cmpltps: + case X86::BI__builtin_ia32_cmpleps: + case X86::BI__builtin_ia32_cmpunordps: + case X86::BI__builtin_ia32_cmpneqps: + case X86::BI__builtin_ia32_cmpnltps: + case X86::BI__builtin_ia32_cmpnleps: + case X86::BI__builtin_ia32_cmpordps: + name = "cmpps"; + ID = Intrinsic::x86_sse_cmp_ps; + break; + case X86::BI__builtin_ia32_cmpeqss: + case X86::BI__builtin_ia32_cmpltss: + case X86::BI__builtin_ia32_cmpless: + case X86::BI__builtin_ia32_cmpunordss: + case X86::BI__builtin_ia32_cmpneqss: + case X86::BI__builtin_ia32_cmpnltss: + case X86::BI__builtin_ia32_cmpnless: + case X86::BI__builtin_ia32_cmpordss: + name = "cmpss"; + ID = Intrinsic::x86_sse_cmp_ss; + break; + case X86::BI__builtin_ia32_cmpeqpd: + case X86::BI__builtin_ia32_cmpltpd: + case X86::BI__builtin_ia32_cmplepd: + case X86::BI__builtin_ia32_cmpunordpd: + case X86::BI__builtin_ia32_cmpneqpd: + case X86::BI__builtin_ia32_cmpnltpd: + case X86::BI__builtin_ia32_cmpnlepd: + case X86::BI__builtin_ia32_cmpordpd: + name = "cmppd"; + ID = Intrinsic::x86_sse2_cmp_pd; + break; + case X86::BI__builtin_ia32_cmpeqsd: + case X86::BI__builtin_ia32_cmpltsd: + case X86::BI__builtin_ia32_cmplesd: + case X86::BI__builtin_ia32_cmpunordsd: + case X86::BI__builtin_ia32_cmpneqsd: + case X86::BI__builtin_ia32_cmpnltsd: + case X86::BI__builtin_ia32_cmpnlesd: + case X86::BI__builtin_ia32_cmpordsd: + name = "cmpsd"; + ID = Intrinsic::x86_sse2_cmp_sd; + break; + } + + Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Ops, name); } } @@ -5942,6 +6214,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_altivec_lvewx: case PPC::BI__builtin_altivec_lvsl: case PPC::BI__builtin_altivec_lvsr: + case PPC::BI__builtin_vsx_lxvd2x: + case PPC::BI__builtin_vsx_lxvw4x: { Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); @@ -5971,6 +6245,12 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_altivec_lvsr: ID = Intrinsic::ppc_altivec_lvsr; break; + case PPC::BI__builtin_vsx_lxvd2x: + ID = Intrinsic::ppc_vsx_lxvd2x; + break; + case PPC::BI__builtin_vsx_lxvw4x: + ID = Intrinsic::ppc_vsx_lxvw4x; + break; } llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, ""); @@ -5982,6 +6262,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_altivec_stvebx: case PPC::BI__builtin_altivec_stvehx: case PPC::BI__builtin_altivec_stvewx: + case PPC::BI__builtin_vsx_stxvd2x: + case PPC::BI__builtin_vsx_stxvw4x: { Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); @@ -6004,6 +6286,12 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_altivec_stvewx: ID = Intrinsic::ppc_altivec_stvewx; break; + case PPC::BI__builtin_vsx_stxvd2x: + ID = Intrinsic::ppc_vsx_stxvd2x; + break; + case PPC::BI__builtin_vsx_stxvw4x: + ID = Intrinsic::ppc_vsx_stxvw4x; + break; } llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, ""); @@ -6033,6 +6321,17 @@ static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF, return CGF.Builder.CreateCall3(F, Src0, Src1, Src2); } +// Emit an intrinsic that has 1 float or double operand, and 1 integer. +static Value *emitFPIntBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall2(F, Src0, Src1); +} + Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { @@ -6065,18 +6364,23 @@ Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, return Result; } case R600::BI__builtin_amdgpu_div_fmas: - case R600::BI__builtin_amdgpu_div_fmasf: - return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fmas); + case R600::BI__builtin_amdgpu_div_fmasf: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); + llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); + + llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas, + Src0->getType()); + llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); + return Builder.CreateCall4(F, Src0, Src1, Src2, Src3ToBool); + } case R600::BI__builtin_amdgpu_div_fixup: case R600::BI__builtin_amdgpu_div_fixupf: return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup); case R600::BI__builtin_amdgpu_trig_preop: - case R600::BI__builtin_amdgpu_trig_preopf: { - Value *Src0 = EmitScalarExpr(E->getArg(0)); - Value *Src1 = EmitScalarExpr(E->getArg(1)); - Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_trig_preop, Src0->getType()); - return Builder.CreateCall2(F, Src0, Src1); - } + case R600::BI__builtin_amdgpu_trig_preopf: + return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop); case R600::BI__builtin_amdgpu_rcp: case R600::BI__builtin_amdgpu_rcpf: return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp); @@ -6086,6 +6390,12 @@ Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, case R600::BI__builtin_amdgpu_rsq_clamped: case R600::BI__builtin_amdgpu_rsq_clampedf: return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped); + case R600::BI__builtin_amdgpu_ldexp: + case R600::BI__builtin_amdgpu_ldexpf: + return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp); + case R600::BI__builtin_amdgpu_class: + case R600::BI__builtin_amdgpu_classf: + return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class); default: return nullptr; } |