diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp | 1191 |
1 files changed, 1013 insertions, 178 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp index a5fc531..b3d02f1 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp @@ -11,13 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGCXXABI.h" #include "CGObjCRuntime.h" +#include "CGOpenCLRuntime.h" +#include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/Analysis/Analyses/OSLog.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" @@ -33,10 +35,15 @@ using namespace clang; using namespace CodeGen; using namespace llvm; +static +int64_t clamp(int64_t Value, int64_t Low, int64_t High) { + return std::min(High, std::max(Low, Value)); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". -llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, - unsigned BuiltinID) { +llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, + unsigned BuiltinID) { assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); // Get the name, skip over the __builtin_ prefix (if necessary). @@ -302,10 +309,10 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { return CGF.Builder.CreateICmpSLT(V, Zero); } -static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, - const CallExpr *E, llvm::Value *calleeValue) { - return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E, - ReturnValueSlot(), Fn); +static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, + const CallExpr *E, llvm::Constant *calleeValue) { + CGCallee callee = CGCallee::forDirect(calleeValue, FD); + return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); } /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* @@ -462,6 +469,119 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, return Builder.CreateCall(F, {EmitScalarExpr(E), CI}); } +// Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we +// handle them here. +enum class CodeGenFunction::MSVCIntrin { + _BitScanForward, + _BitScanReverse, + _InterlockedAnd, + _InterlockedDecrement, + _InterlockedExchange, + _InterlockedExchangeAdd, + _InterlockedExchangeSub, + _InterlockedIncrement, + _InterlockedOr, + _InterlockedXor, +}; + +Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, + const CallExpr *E) { + switch (BuiltinID) { + case MSVCIntrin::_BitScanForward: + case MSVCIntrin::_BitScanReverse: { + Value *ArgValue = EmitScalarExpr(E->getArg(1)); + + llvm::Type *ArgType = ArgValue->getType(); + llvm::Type *IndexType = + EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType(); + llvm::Type *ResultType = ConvertType(E->getType()); + + Value *ArgZero = llvm::Constant::getNullValue(ArgType); + Value *ResZero = llvm::Constant::getNullValue(ResultType); + Value *ResOne = llvm::ConstantInt::get(ResultType, 1); + + BasicBlock *Begin = Builder.GetInsertBlock(); + BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); + Builder.SetInsertPoint(End); + PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); + + Builder.SetInsertPoint(Begin); + Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); + BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); + Builder.CreateCondBr(IsZero, End, NotZero); + Result->addIncoming(ResZero, Begin); + + Builder.SetInsertPoint(NotZero); + Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); + + if (BuiltinID == MSVCIntrin::_BitScanForward) { + Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); + ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); + Builder.CreateStore(ZeroCount, IndexAddress, false); + } else { + unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); + Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); + + Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); + ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); + Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); + Builder.CreateStore(Index, IndexAddress, false); + } + Builder.CreateBr(End); + Result->addIncoming(ResOne, NotZero); + + Builder.SetInsertPoint(End); + return Result; + } + case MSVCIntrin::_InterlockedAnd: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); + case MSVCIntrin::_InterlockedExchange: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); + case MSVCIntrin::_InterlockedExchangeAdd: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); + case MSVCIntrin::_InterlockedExchangeSub: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); + case MSVCIntrin::_InterlockedOr: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); + case MSVCIntrin::_InterlockedXor: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); + + case MSVCIntrin::_InterlockedDecrement: { + llvm::Type *IntTy = ConvertType(E->getType()); + AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( + AtomicRMWInst::Sub, + EmitScalarExpr(E->getArg(0)), + ConstantInt::get(IntTy, 1), + llvm::AtomicOrdering::SequentiallyConsistent); + return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); + } + case MSVCIntrin::_InterlockedIncrement: { + llvm::Type *IntTy = ConvertType(E->getType()); + AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( + AtomicRMWInst::Add, + EmitScalarExpr(E->getArg(0)), + ConstantInt::get(IntTy, 1), + llvm::AtomicOrdering::SequentiallyConsistent); + return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); + } + } + llvm_unreachable("Incorrect MSVC intrinsic!"); +} + +namespace { +// ARC cleanup for __builtin_os_log_format +struct CallObjCArcUse final : EHScopeStack::Cleanup { + CallObjCArcUse(llvm::Value *object) : object(object) {} + llvm::Value *object; + + void Emit(CodeGenFunction &CGF, Flags flags) override { + CGF.EmitARCIntrinsicUse(object); + } +}; +} + RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -681,6 +801,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, "cast"); return RValue::get(Result); } + case Builtin::BI__popcnt16: + case Builtin::BI__popcnt: + case Builtin::BI__popcnt64: case Builtin::BI__builtin_popcount: case Builtin::BI__builtin_popcountl: case Builtin::BI__builtin_popcountll: { @@ -696,6 +819,58 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, "cast"); return RValue::get(Result); } + case Builtin::BI_rotr8: + case Builtin::BI_rotr16: + case Builtin::BI_rotr: + case Builtin::BI_lrotr: + case Builtin::BI_rotr64: { + Value *Val = EmitScalarExpr(E->getArg(0)); + Value *Shift = EmitScalarExpr(E->getArg(1)); + + llvm::Type *ArgType = Val->getType(); + Shift = Builder.CreateIntCast(Shift, ArgType, false); + unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); + Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); + Value *ArgZero = llvm::Constant::getNullValue(ArgType); + + Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); + Shift = Builder.CreateAnd(Shift, Mask); + Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift); + + Value *RightShifted = Builder.CreateLShr(Val, Shift); + Value *LeftShifted = Builder.CreateShl(Val, LeftShift); + Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); + + Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); + Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); + return RValue::get(Result); + } + case Builtin::BI_rotl8: + case Builtin::BI_rotl16: + case Builtin::BI_rotl: + case Builtin::BI_lrotl: + case Builtin::BI_rotl64: { + Value *Val = EmitScalarExpr(E->getArg(0)); + Value *Shift = EmitScalarExpr(E->getArg(1)); + + llvm::Type *ArgType = Val->getType(); + Shift = Builder.CreateIntCast(Shift, ArgType, false); + unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); + Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); + Value *ArgZero = llvm::Constant::getNullValue(ArgType); + + Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); + Shift = Builder.CreateAnd(Shift, Mask); + Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift); + + Value *LeftShifted = Builder.CreateShl(Val, Shift); + Value *RightShifted = Builder.CreateLShr(Val, RightShift); + Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); + + Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); + Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); + return RValue::get(Result); + } case Builtin::BI__builtin_unpredictable: { // Always return the argument of __builtin_unpredictable. LLVM does not // handle this builtin. Metadata for this builtin should be added directly @@ -789,8 +964,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, SanitizerScope SanScope(this); EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), SanitizerKind::Unreachable), - "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()), - None); + SanitizerHandler::BuiltinUnreachable, + EmitCheckSourceLocation(E->getExprLoc()), None); } else Builder.CreateUnreachable(); @@ -851,6 +1026,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } + case Builtin::BIfinite: + case Builtin::BI__finite: + case Builtin::BIfinitef: + case Builtin::BI__finitef: + case Builtin::BIfinitel: + case Builtin::BI__finitel: case Builtin::BI__builtin_isinf: case Builtin::BI__builtin_isfinite: { // isinf(x) --> fabs(x) == infinity @@ -963,8 +1144,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_alloca: case Builtin::BI__builtin_alloca: { Value *Size = EmitScalarExpr(E->getArg(0)); - return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size)); + const TargetInfo &TI = getContext().getTargetInfo(); + // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. + unsigned SuitableAlignmentInBytes = + CGM.getContext() + .toCharUnitsFromBits(TI.getSuitableAlign()) + .getQuantity(); + AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); + AI->setAlignment(SuitableAlignmentInBytes); + return RValue::get(AI); + } + + case Builtin::BI__builtin_alloca_with_align: { + Value *Size = EmitScalarExpr(E->getArg(0)); + Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); + auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); + unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); + unsigned AlignmentInBytes = + CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); + AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); + AI->setAlignment(AlignmentInBytes); + return RValue::get(AI); } + case Builtin::BIbzero: case Builtin::BI__builtin_bzero: { Address Dest = EmitPointerWithAlignment(E->getArg(0)); @@ -987,6 +1189,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Dest.getPointer()); } + case Builtin::BI__builtin_char_memchr: + BuiltinID = Builtin::BI__builtin_memchr; + break; + case Builtin::BI__builtin___memcpy_chk: { // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. llvm::APSInt Size, DstSize; @@ -1085,6 +1291,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Depth)); } + case Builtin::BI_ReturnAddress: { + Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); + return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); + } case Builtin::BI__builtin_frame_address: { Value *Depth = CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); @@ -1390,7 +1600,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); - return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); + return EmitCall(FuncInfo, CGCallee::forDirect(Func), + ReturnValueSlot(), Args); } case Builtin::BI__atomic_test_and_set: { @@ -1905,12 +2116,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, const CallExpr *Call = cast<CallExpr>(E->getArg(0)); const Expr *Chain = E->getArg(1); return EmitCall(Call->getCallee()->getType(), - EmitScalarExpr(Call->getCallee()), Call, ReturnValue, - Call->getCalleeDecl(), EmitScalarExpr(Chain)); + EmitCallee(Call->getCallee()), Call, ReturnValue, + EmitScalarExpr(Chain)); } + case Builtin::BI_InterlockedExchange8: + case Builtin::BI_InterlockedExchange16: case Builtin::BI_InterlockedExchange: case Builtin::BI_InterlockedExchangePointer: - return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); + return RValue::get( + EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); case Builtin::BI_InterlockedCompareExchangePointer: { llvm::Type *RTy; llvm::IntegerType *IntType = @@ -1938,7 +2152,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 0), RTy)); } - case Builtin::BI_InterlockedCompareExchange: { + case Builtin::BI_InterlockedCompareExchange8: + case Builtin::BI_InterlockedCompareExchange16: + case Builtin::BI_InterlockedCompareExchange: + case Builtin::BI_InterlockedCompareExchange64: { AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(2)), @@ -1948,42 +2165,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, CXI->setVolatile(true); return RValue::get(Builder.CreateExtractValue(CXI, 0)); } - case Builtin::BI_InterlockedIncrement: { - llvm::Type *IntTy = ConvertType(E->getType()); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Add, - EmitScalarExpr(E->getArg(0)), - ConstantInt::get(IntTy, 1), - llvm::AtomicOrdering::SequentiallyConsistent); - RMWI->setVolatile(true); - return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1))); - } - case Builtin::BI_InterlockedDecrement: { - llvm::Type *IntTy = ConvertType(E->getType()); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Sub, - EmitScalarExpr(E->getArg(0)), - ConstantInt::get(IntTy, 1), - llvm::AtomicOrdering::SequentiallyConsistent); - RMWI->setVolatile(true); - return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1))); - } - case Builtin::BI_InterlockedExchangeAdd: { - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Add, - EmitScalarExpr(E->getArg(0)), - EmitScalarExpr(E->getArg(1)), - llvm::AtomicOrdering::SequentiallyConsistent); - RMWI->setVolatile(true); - return RValue::get(RMWI); - } + case Builtin::BI_InterlockedIncrement16: + case Builtin::BI_InterlockedIncrement: + return RValue::get( + EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); + case Builtin::BI_InterlockedDecrement16: + case Builtin::BI_InterlockedDecrement: + return RValue::get( + EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); + case Builtin::BI_InterlockedAnd8: + case Builtin::BI_InterlockedAnd16: + case Builtin::BI_InterlockedAnd: + return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); + case Builtin::BI_InterlockedExchangeAdd8: + case Builtin::BI_InterlockedExchangeAdd16: + case Builtin::BI_InterlockedExchangeAdd: + return RValue::get( + EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); + case Builtin::BI_InterlockedExchangeSub8: + case Builtin::BI_InterlockedExchangeSub16: + case Builtin::BI_InterlockedExchangeSub: + return RValue::get( + EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); + case Builtin::BI_InterlockedOr8: + case Builtin::BI_InterlockedOr16: + case Builtin::BI_InterlockedOr: + return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); + case Builtin::BI_InterlockedXor8: + case Builtin::BI_InterlockedXor16: + case Builtin::BI_InterlockedXor: + return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); case Builtin::BI__readfsdword: { llvm::Type *IntTy = ConvertType(E->getType()); Value *IntToPtr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), llvm::PointerType::get(IntTy, 257)); - LoadInst *Load = - Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true); + LoadInst *Load = Builder.CreateAlignedLoad( + IntTy, IntToPtr, getContext().getTypeAlignInChars(E->getType())); + Load->setVolatile(true); return RValue::get(Load); } @@ -2004,7 +2223,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Attribute::ReturnsTwice); llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), - "_setjmpex", ReturnsTwiceAttr); + "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); llvm::Value *Buf = Builder.CreateBitOrPointerCast( EmitScalarExpr(E->getArg(0)), Int8PtrTy); llvm::Value *FrameAddr = @@ -2029,7 +2248,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), - "_setjmp3", ReturnsTwiceAttr); + "_setjmp3", ReturnsTwiceAttr, /*Local=*/true); llvm::Value *Count = ConstantInt::get(IntTy, 0); llvm::Value *Args[] = {Buf, Count}; CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); @@ -2037,7 +2256,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), - "_setjmp", ReturnsTwiceAttr); + "_setjmp", ReturnsTwiceAttr, /*Local=*/true); llvm::Value *FrameAddr = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), ConstantInt::get(Int32Ty, 0)); @@ -2057,11 +2276,47 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; } + case Builtin::BI__builtin_coro_size: { + auto & Context = getContext(); + auto SizeTy = Context.getSizeType(); + auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); + Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); + return RValue::get(Builder.CreateCall(F)); + } + + case Builtin::BI__builtin_coro_id: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); + case Builtin::BI__builtin_coro_promise: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); + case Builtin::BI__builtin_coro_resume: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); + case Builtin::BI__builtin_coro_frame: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); + case Builtin::BI__builtin_coro_free: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); + case Builtin::BI__builtin_coro_destroy: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); + case Builtin::BI__builtin_coro_done: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); + case Builtin::BI__builtin_coro_alloc: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); + case Builtin::BI__builtin_coro_begin: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); + case Builtin::BI__builtin_coro_end: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); + case Builtin::BI__builtin_coro_suspend: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); + case Builtin::BI__builtin_coro_param: + return EmitCoroutineIntrinsic(E, Intrinsic::coro_param); + // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions case Builtin::BIread_pipe: case Builtin::BIwrite_pipe: { Value *Arg0 = EmitScalarExpr(E->getArg(0)), *Arg1 = EmitScalarExpr(E->getArg(1)); + CGOpenCLRuntime OpenCLRT(CGM); + Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); + Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); // Type of the generic packet parameter. unsigned GenericAS = @@ -2075,19 +2330,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, : "__write_pipe_2"; // Creating a generic function type to be able to call with any builtin or // user defined type. - llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy}; + llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); - return RValue::get(Builder.CreateCall( - CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast})); + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + {Arg0, BCast, PacketSize, PacketAlign})); } else { assert(4 == E->getNumArgs() && "Illegal number of parameters to pipe function"); const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" : "__write_pipe_4"; - llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy}; + llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, + Int32Ty, Int32Ty}; Value *Arg2 = EmitScalarExpr(E->getArg(2)), *Arg3 = EmitScalarExpr(E->getArg(3)); llvm::FunctionType *FTy = llvm::FunctionType::get( @@ -2098,7 +2355,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, if (Arg2->getType() != Int32Ty) Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); return RValue::get(Builder.CreateCall( - CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast})); + CGM.CreateRuntimeFunction(FTy, Name), + {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); } } // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write @@ -2127,9 +2385,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *Arg0 = EmitScalarExpr(E->getArg(0)), *Arg1 = EmitScalarExpr(E->getArg(1)); llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); + CGOpenCLRuntime OpenCLRT(CGM); + Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); + Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); // Building the generic function prototype. - llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty}; + llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; llvm::FunctionType *FTy = llvm::FunctionType::get( ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); // We know the second argument is an integer type, but we may need to cast @@ -2137,7 +2398,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, if (Arg1->getType() != Int32Ty) Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1})); + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + {Arg0, Arg1, PacketSize, PacketAlign})); } // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write // functions @@ -2163,15 +2425,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *Arg0 = EmitScalarExpr(E->getArg(0)), *Arg1 = EmitScalarExpr(E->getArg(1)); + CGOpenCLRuntime OpenCLRT(CGM); + Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); + Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); // Building the generic function prototype. - llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()}; + llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), llvm::ArrayRef<llvm::Type *>(ArgTys), false); return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1})); + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + {Arg0, Arg1, PacketSize, PacketAlign})); } // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions case Builtin::BIget_pipe_num_packets: @@ -2184,12 +2450,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Building the generic function prototype. Value *Arg0 = EmitScalarExpr(E->getArg(0)); - llvm::Type *ArgTys[] = {Arg0->getType()}; + CGOpenCLRuntime OpenCLRT(CGM); + Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); + Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); + llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); - return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0})); + return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + {Arg0, PacketSize, PacketAlign})); } // OpenCL v2.0 s6.13.9 - Address space qualifier functions. @@ -2258,17 +2527,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy, IntTy}; - // Add the variadics. - for (unsigned I = 4; I < NumArgs; ++I) { - llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I)); - unsigned TypeSizeInBytes = - getContext() - .getTypeSizeInChars(E->getArg(I)->getType()) - .getQuantity(); - Args.push_back(TypeSizeInBytes < 4 - ? Builder.CreateZExt(ArgSize, Int32Ty) - : ArgSize); - } + // Each of the following arguments specifies the size of the corresponding + // argument passed to the enqueued block. + for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I) + Args.push_back( + Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); @@ -2279,29 +2542,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Any calls now have event arguments passed. if (NumArgs >= 7) { llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); - unsigned AS4 = - E->getArg(4)->getType()->isArrayType() - ? E->getArg(4)->getType().getAddressSpace() - : E->getArg(4)->getType()->getPointeeType().getAddressSpace(); - llvm::Type *EventPtrAS4Ty = - EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4)); - unsigned AS5 = - E->getArg(5)->getType()->getPointeeType().getAddressSpace(); - llvm::Type *EventPtrAS5Ty = - EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5)); - - llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3)); + llvm::Type *EventPtrTy = EventTy->getPointerTo( + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); + + llvm::Value *NumEvents = + Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); llvm::Value *EventList = E->getArg(4)->getType()->isArrayType() ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() : EmitScalarExpr(E->getArg(4)); llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); + // Convert to generic address space. + EventList = Builder.CreatePointerCast(EventList, EventPtrTy); + ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); llvm::Value *Block = Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy); - std::vector<llvm::Type *> ArgTys = { - QueueTy, Int32Ty, RangeTy, Int32Ty, - EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy}; + std::vector<llvm::Type *> ArgTys = {QueueTy, Int32Ty, RangeTy, + Int32Ty, EventPtrTy, EventPtrTy, + Int8PtrTy}; + std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, EventList, ClkEvent, Block}; @@ -2320,17 +2580,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, ArgTys.push_back(Int32Ty); Name = "__enqueue_kernel_events_vaargs"; - // Add the variadics. - for (unsigned I = 7; I < NumArgs; ++I) { - llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I)); - unsigned TypeSizeInBytes = - getContext() - .getTypeSizeInChars(E->getArg(I)->getType()) - .getQuantity(); - Args.push_back(TypeSizeInBytes < 4 - ? Builder.CreateZExt(ArgSize, Int32Ty) - : ArgSize); - } + // Each of the following arguments specifies the size of the corresponding + // argument passed to the enqueued block. + for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I) + Args.push_back( + Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); + llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); return RValue::get( @@ -2373,6 +2628,76 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Fall through - it's already mapped to the intrinsic by GCCBuiltin. break; } + case Builtin::BI__builtin_os_log_format: { + assert(E->getNumArgs() >= 2 && + "__builtin_os_log_format takes at least 2 arguments"); + analyze_os_log::OSLogBufferLayout Layout; + analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); + Address BufAddr = EmitPointerWithAlignment(E->getArg(0)); + // Ignore argument 1, the format string. It is not currently used. + CharUnits Offset; + Builder.CreateStore( + Builder.getInt8(Layout.getSummaryByte()), + Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); + Builder.CreateStore( + Builder.getInt8(Layout.getNumArgsByte()), + Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); + + llvm::SmallVector<llvm::Value *, 4> RetainableOperands; + for (const auto &Item : Layout.Items) { + Builder.CreateStore( + Builder.getInt8(Item.getDescriptorByte()), + Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); + Builder.CreateStore( + Builder.getInt8(Item.getSizeByte()), + Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); + Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset); + if (const Expr *TheExpr = Item.getExpr()) { + Addr = Builder.CreateElementBitCast( + Addr, ConvertTypeForMem(TheExpr->getType())); + // Check if this is a retainable type. + if (TheExpr->getType()->isObjCRetainableType()) { + assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && + "Only scalar can be a ObjC retainable type"); + llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false); + RValue RV = RValue::get(SV); + LValue LV = MakeAddrLValue(Addr, TheExpr->getType()); + EmitStoreThroughLValue(RV, LV); + // Check if the object is constant, if not, save it in + // RetainableOperands. + if (!isa<Constant>(SV)) + RetainableOperands.push_back(SV); + } else { + EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true); + } + } else { + Addr = Builder.CreateElementBitCast(Addr, Int32Ty); + Builder.CreateStore( + Builder.getInt32(Item.getConstValue().getQuantity()), Addr); + } + Offset += Item.size(); + } + + // Push a clang.arc.use cleanup for each object in RetainableOperands. The + // cleanup will cause the use to appear after the final log call, keeping + // the object valid while it’s held in the log buffer. Note that if there’s + // a release cleanup on the object, it will already be active; since + // cleanups are emitted in reverse order, the use will occur before the + // object is released. + if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && + CGM.getCodeGenOpts().OptimizationLevel != 0) + for (llvm::Value *object : RetainableOperands) + pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object); + + return RValue::get(BufAddr.getPointer()); + } + + case Builtin::BI__builtin_os_log_format_buffer_size: { + analyze_os_log::OSLogBufferLayout Layout; + analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); + return RValue::get(ConstantInt::get(ConvertType(E->getType()), + Layout.size().getQuantity())); + } } // If this is an alias for a lib function (e.g. __builtin_sin), emit @@ -2385,7 +2710,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // If this is a predefined lib function (e.g. malloc), emit the call // using exactly the normal call path. if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) - return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee())); + return emitLibraryCall(*this, FD, E, + cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); // Check that a call to a target specific builtin has the correct target // features. @@ -2397,14 +2723,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // See if we have a target specific intrinsic. const char *Name = getContext().BuiltinInfo.getName(BuiltinID); Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; - if (const char *Prefix = - llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) { - IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); + StringRef Prefix = + llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); + if (!Prefix.empty()) { + IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); // NOTE we dont need to perform a compatibility flag check here since the // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. if (IntrinsicID == Intrinsic::not_intrinsic) - IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name); + IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); } if (IntrinsicID != Intrinsic::not_intrinsic) { @@ -3871,7 +4198,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, if (SysReg.empty()) { const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); - SysReg = cast<StringLiteral>(SysRegStrExpr)->getString(); + SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString(); } llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; @@ -3995,9 +4322,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } if (BuiltinID == ARM::BI__builtin_arm_rbit) { - return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit), - EmitScalarExpr(E->getArg(0)), - "rbit"); + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } if (BuiltinID == ARM::BI__clear_cache) { @@ -4120,19 +4447,21 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, QualType Ty = E->getType(); llvm::Type *RealResTy = ConvertType(Ty); - llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), - getContext().getTypeSize(Ty)); - LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); + llvm::Type *PtrTy = llvm::IntegerType::get( + getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); + LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex, - LoadAddr->getType()); + PtrTy); Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); if (RealResTy->isPointerTy()) return Builder.CreateIntToPtr(Val, RealResTy); else { + llvm::Type *IntResTy = llvm::IntegerType::get( + getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); Val = Builder.CreateTruncOrBitCast(Val, IntResTy); return Builder.CreateBitCast(Val, RealResTy); } @@ -4173,7 +4502,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, if (StoreVal->getType()->isPointerTy()) StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); else { - StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); + llvm::Type *IntTy = llvm::IntegerType::get( + getLLVMContext(), + CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); + StoreVal = Builder.CreateBitCast(StoreVal, IntTy); StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); } @@ -4184,6 +4516,41 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); } + switch (BuiltinID) { + case ARM::BI__iso_volatile_load8: + case ARM::BI__iso_volatile_load16: + case ARM::BI__iso_volatile_load32: + case ARM::BI__iso_volatile_load64: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), + LoadSize.getQuantity() * 8); + Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::LoadInst *Load = + Builder.CreateAlignedLoad(Ptr, LoadSize); + Load->setVolatile(true); + return Load; + } + case ARM::BI__iso_volatile_store8: + case ARM::BI__iso_volatile_store16: + case ARM::BI__iso_volatile_store32: + case ARM::BI__iso_volatile_store64: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Value = EmitScalarExpr(E->getArg(1)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), + StoreSize.getQuantity() * 8); + Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::StoreInst *Store = + Builder.CreateAlignedStore(Value, Ptr, + StoreSize); + Store->setVolatile(true); + return Store; + } + } + if (BuiltinID == ARM::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); return Builder.CreateCall(F); @@ -4397,6 +4764,29 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], Ops[3], Ops[4], Ops[5]}); } + case ARM::BI_BitScanForward: + case ARM::BI_BitScanForward64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); + case ARM::BI_BitScanReverse: + case ARM::BI_BitScanReverse64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); + + case ARM::BI_InterlockedAnd64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); + case ARM::BI_InterlockedExchange64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); + case ARM::BI_InterlockedExchangeAdd64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); + case ARM::BI_InterlockedExchangeSub64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); + case ARM::BI_InterlockedOr64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); + case ARM::BI_InterlockedXor64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); + case ARM::BI_InterlockedDecrement64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); + case ARM::BI_InterlockedIncrement64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); } // Get the last argument, which specifies the vector type. @@ -4840,14 +5230,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, "rbit of unusual size!"); llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall( - CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); + CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { assert((getContext().getTypeSize(E->getType()) == 64) && "rbit of unusual size!"); llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall( - CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); + CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } if (BuiltinID == AArch64::BI__clear_cache) { @@ -4889,19 +5279,21 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, QualType Ty = E->getType(); llvm::Type *RealResTy = ConvertType(Ty); - llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), - getContext().getTypeSize(Ty)); - LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); + llvm::Type *PtrTy = llvm::IntegerType::get( + getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); + LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr, - LoadAddr->getType()); + PtrTy); Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); if (RealResTy->isPointerTy()) return Builder.CreateIntToPtr(Val, RealResTy); + llvm::Type *IntResTy = llvm::IntegerType::get( + getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); Val = Builder.CreateTruncOrBitCast(Val, IntResTy); return Builder.CreateBitCast(Val, RealResTy); } @@ -4940,7 +5332,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, if (StoreVal->getType()->isPointerTy()) StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); else { - StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); + llvm::Type *IntTy = llvm::IntegerType::get( + getLLVMContext(), + CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); + StoreVal = Builder.CreateBitCast(StoreVal, IntTy); StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); } @@ -5065,9 +5460,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, switch (BuiltinID) { default: break; case NEON::BI__builtin_neon_vldrq_p128: { - llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); + llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); + llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); - return Builder.CreateDefaultAlignedLoad(Ptr); + return Builder.CreateAlignedLoad(Int128Ty, Ptr, + CharUnits::fromQuantity(16)); } case NEON::BI__builtin_neon_vstrq_p128: { llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); @@ -6240,27 +6637,37 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); } case NEON::BI__builtin_neon_vld1_v: - case NEON::BI__builtin_neon_vld1q_v: + case NEON::BI__builtin_neon_vld1q_v: { Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); - return Builder.CreateDefaultAlignedLoad(Ops[0]); + auto Alignment = CharUnits::fromQuantity( + BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); + return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); + } case NEON::BI__builtin_neon_vst1_v: case NEON::BI__builtin_neon_vst1q_v: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); Ops[1] = Builder.CreateBitCast(Ops[1], VTy); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); case NEON::BI__builtin_neon_vld1_lane_v: - case NEON::BI__builtin_neon_vld1q_lane_v: + case NEON::BI__builtin_neon_vld1q_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]); + auto Alignment = CharUnits::fromQuantity( + BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); + Ops[0] = + Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); + } case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: { Value *V = UndefValue::get(Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]); + auto Alignment = CharUnits::fromQuantity( + BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); + Ops[0] = + Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); return EmitNeonSplat(Ops[0], CI); @@ -6620,6 +7027,26 @@ static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); } +static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, + SmallVectorImpl<Value *> &Ops, + llvm::Type *DstTy, + unsigned SrcSizeInBits, + unsigned Align) { + // Load the subvector. + Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); + + // Create broadcast mask. + unsigned NumDstElts = DstTy->getVectorNumElements(); + unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); + + SmallVector<uint32_t, 8> Mask; + for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) + for (unsigned j = 0; j != NumSrcElts; ++j) + Mask.push_back(j); + + return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); +} + static Value *EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1) { @@ -6676,6 +7103,18 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, std::max(NumElts, 8U))); } +static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, + ArrayRef<Value *> Ops) { + Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); + Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + + if (Ops.size() == 2) + return Res; + + assert(Ops.size() == 4); + return EmitX86Select(CGF, Ops[3], Res, Ops[2]); +} + Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (BuiltinID == X86::BI__builtin_ms_va_start || @@ -6860,6 +7299,25 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, Data}); } + case X86::BI_mm_clflush: { + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), + Ops[0]); + } + case X86::BI_mm_lfence: { + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); + } + case X86::BI_mm_mfence: { + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); + } + case X86::BI_mm_sfence: { + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); + } + case X86::BI_mm_pause: { + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); + } + case X86::BI__rdtsc: { + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); + } case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: @@ -6872,12 +7330,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vec_ext_v2si: return Builder.CreateExtractElement(Ops[0], llvm::ConstantInt::get(Ops[1]->getType(), 0)); + case X86::BI_mm_setcsr: case X86::BI__builtin_ia32_ldmxcsr: { Address Tmp = CreateMemTemp(E->getArg(0)->getType()); Builder.CreateStore(Ops[0], Tmp); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); } + case X86::BI_mm_getcsr: case X86::BI__builtin_ia32_stmxcsr: { Address Tmp = CreateMemTemp(E->getType()); Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), @@ -6944,6 +7404,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storeups512_mask: return EmitX86MaskedStore(*this, Ops, 1); + case X86::BI__builtin_ia32_storess128_mask: + case X86::BI__builtin_ia32_storesd128_mask: { + return EmitX86MaskedStore(*this, Ops, 16); + } case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: @@ -6980,6 +7444,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_loaddqudi512_mask: return EmitX86MaskedLoad(*this, Ops, 1); + case X86::BI__builtin_ia32_loadss128_mask: + case X86::BI__builtin_ia32_loadsd128_mask: + return EmitX86MaskedLoad(*this, Ops, 16); + case X86::BI__builtin_ia32_loadaps128_mask: case X86::BI__builtin_ia32_loadaps256_mask: case X86::BI__builtin_ia32_loadaps512_mask: @@ -6996,6 +7464,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); return EmitX86MaskedLoad(*this, Ops, Align); } + + case X86::BI__builtin_ia32_vbroadcastf128_pd256: + case X86::BI__builtin_ia32_vbroadcastf128_ps256: { + llvm::Type *DstTy = ConvertType(E->getType()); + return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); + } + case X86::BI__builtin_ia32_storehps: case X86::BI__builtin_ia32_storelps: { llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); @@ -7015,8 +7490,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: - case X86::BI__builtin_ia32_palignr128_mask: - case X86::BI__builtin_ia32_palignr256_mask: case X86::BI__builtin_ia32_palignr512_mask: { unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); @@ -7059,36 +7532,26 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI__builtin_ia32_movnti: - case X86::BI__builtin_ia32_movnti64: { - llvm::MDNode *Node = llvm::MDNode::get( - getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); - - // Convert the type of the pointer to a pointer to the stored type. - Value *BC = Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(Ops[1]->getType()), - "cast"); - StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC); - SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); - - // No alignment for scalar intrinsic store. - SI->setAlignment(1); - return SI; - } + case X86::BI__builtin_ia32_movnti64: case X86::BI__builtin_ia32_movntsd: case X86::BI__builtin_ia32_movntss: { llvm::MDNode *Node = llvm::MDNode::get( getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); + Value *Ptr = Ops[0]; + Value *Src = Ops[1]; + // Extract the 0'th element of the source vector. - Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract"); + if (BuiltinID == X86::BI__builtin_ia32_movntsd || + BuiltinID == X86::BI__builtin_ia32_movntss) + Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); // Convert the type of the pointer to a pointer to the stored type. - Value *BC = Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(Scl->getType()), - "cast"); + Value *BC = Builder.CreateBitCast( + Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast"); // Unaligned nontemporal store of the scalar value. - StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC); + StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); SI->setAlignment(1); return SI; @@ -7182,43 +7645,58 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[1]); } - // TODO: Handle 64/512-bit vector widths of min/max. case X86::BI__builtin_ia32_pmaxsb128: case X86::BI__builtin_ia32_pmaxsw128: case X86::BI__builtin_ia32_pmaxsd128: + case X86::BI__builtin_ia32_pmaxsq128_mask: case X86::BI__builtin_ia32_pmaxsb256: case X86::BI__builtin_ia32_pmaxsw256: - case X86::BI__builtin_ia32_pmaxsd256: { - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]); - return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); - } + case X86::BI__builtin_ia32_pmaxsd256: + case X86::BI__builtin_ia32_pmaxsq256_mask: + case X86::BI__builtin_ia32_pmaxsb512_mask: + case X86::BI__builtin_ia32_pmaxsw512_mask: + case X86::BI__builtin_ia32_pmaxsd512_mask: + case X86::BI__builtin_ia32_pmaxsq512_mask: + return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); case X86::BI__builtin_ia32_pmaxub128: case X86::BI__builtin_ia32_pmaxuw128: case X86::BI__builtin_ia32_pmaxud128: + case X86::BI__builtin_ia32_pmaxuq128_mask: case X86::BI__builtin_ia32_pmaxub256: case X86::BI__builtin_ia32_pmaxuw256: - case X86::BI__builtin_ia32_pmaxud256: { - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]); - return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); - } + case X86::BI__builtin_ia32_pmaxud256: + case X86::BI__builtin_ia32_pmaxuq256_mask: + case X86::BI__builtin_ia32_pmaxub512_mask: + case X86::BI__builtin_ia32_pmaxuw512_mask: + case X86::BI__builtin_ia32_pmaxud512_mask: + case X86::BI__builtin_ia32_pmaxuq512_mask: + return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); case X86::BI__builtin_ia32_pminsb128: case X86::BI__builtin_ia32_pminsw128: case X86::BI__builtin_ia32_pminsd128: + case X86::BI__builtin_ia32_pminsq128_mask: case X86::BI__builtin_ia32_pminsb256: case X86::BI__builtin_ia32_pminsw256: - case X86::BI__builtin_ia32_pminsd256: { - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]); - return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); - } + case X86::BI__builtin_ia32_pminsd256: + case X86::BI__builtin_ia32_pminsq256_mask: + case X86::BI__builtin_ia32_pminsb512_mask: + case X86::BI__builtin_ia32_pminsw512_mask: + case X86::BI__builtin_ia32_pminsd512_mask: + case X86::BI__builtin_ia32_pminsq512_mask: + return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); case X86::BI__builtin_ia32_pminub128: case X86::BI__builtin_ia32_pminuw128: case X86::BI__builtin_ia32_pminud128: + case X86::BI__builtin_ia32_pminuq128_mask: case X86::BI__builtin_ia32_pminub256: case X86::BI__builtin_ia32_pminuw256: - case X86::BI__builtin_ia32_pminud256: { - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]); - return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); - } + case X86::BI__builtin_ia32_pminud256: + case X86::BI__builtin_ia32_pminuq256_mask: + case X86::BI__builtin_ia32_pminub512_mask: + case X86::BI__builtin_ia32_pminuw512_mask: + case X86::BI__builtin_ia32_pminud512_mask: + case X86::BI__builtin_ia32_pminuq512_mask: + return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); // 3DNow! case X86::BI__builtin_ia32_pswapdsf: @@ -7363,6 +7841,87 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); case X86::BI__builtin_ia32_cmpordsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); + + case X86::BI__emul: + case X86::BI__emulu: { + llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); + bool isSigned = (BuiltinID == X86::BI__emul); + Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); + Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); + return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); + } + case X86::BI__mulh: + case X86::BI__umulh: + case X86::BI_mul128: + case X86::BI_umul128: { + llvm::Type *ResType = ConvertType(E->getType()); + llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); + + bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); + Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); + Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); + + Value *MulResult, *HigherBits; + if (IsSigned) { + MulResult = Builder.CreateNSWMul(LHS, RHS); + HigherBits = Builder.CreateAShr(MulResult, 64); + } else { + MulResult = Builder.CreateNUWMul(LHS, RHS); + HigherBits = Builder.CreateLShr(MulResult, 64); + } + HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); + + if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) + return HigherBits; + + Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); + Builder.CreateStore(HigherBits, HighBitsAddress); + return Builder.CreateIntCast(MulResult, ResType, IsSigned); + } + + case X86::BI__faststorefence: { + return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, + llvm::CrossThread); + } + case X86::BI_ReadWriteBarrier: + case X86::BI_ReadBarrier: + case X86::BI_WriteBarrier: { + return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, + llvm::SingleThread); + } + case X86::BI_BitScanForward: + case X86::BI_BitScanForward64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); + case X86::BI_BitScanReverse: + case X86::BI_BitScanReverse64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); + + case X86::BI_InterlockedAnd64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); + case X86::BI_InterlockedExchange64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); + case X86::BI_InterlockedExchangeAdd64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); + case X86::BI_InterlockedExchangeSub64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); + case X86::BI_InterlockedOr64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); + case X86::BI_InterlockedXor64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); + case X86::BI_InterlockedDecrement64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); + case X86::BI_InterlockedIncrement64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); + + case X86::BI_AddressOfReturnAddress: { + Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + return Builder.CreateCall(F); + } + case X86::BI__stosb: { + // We treat __stosb as a volatile memset - it may not generate "rep stosb" + // instruction, but it will create a memset that won't be optimized away. + return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); + } } } @@ -7384,7 +7943,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_ppc_get_timebase: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); - // vec_ld, vec_lvsl, vec_lvsr + // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr case PPC::BI__builtin_altivec_lvx: case PPC::BI__builtin_altivec_lvxl: case PPC::BI__builtin_altivec_lvebx: @@ -7394,11 +7953,19 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_altivec_lvsr: case PPC::BI__builtin_vsx_lxvd2x: case PPC::BI__builtin_vsx_lxvw4x: + case PPC::BI__builtin_vsx_lxvd2x_be: + case PPC::BI__builtin_vsx_lxvw4x_be: + case PPC::BI__builtin_vsx_lxvl: + case PPC::BI__builtin_vsx_lxvll: { - Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); - - Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); - Ops.pop_back(); + if(BuiltinID == PPC::BI__builtin_vsx_lxvl || + BuiltinID == PPC::BI__builtin_vsx_lxvll){ + Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); + }else { + Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); + Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); + Ops.pop_back(); + } switch (BuiltinID) { default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); @@ -7429,12 +7996,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_vsx_lxvw4x: ID = Intrinsic::ppc_vsx_lxvw4x; break; + case PPC::BI__builtin_vsx_lxvd2x_be: + ID = Intrinsic::ppc_vsx_lxvd2x_be; + break; + case PPC::BI__builtin_vsx_lxvw4x_be: + ID = Intrinsic::ppc_vsx_lxvw4x_be; + break; + case PPC::BI__builtin_vsx_lxvl: + ID = Intrinsic::ppc_vsx_lxvl; + break; + case PPC::BI__builtin_vsx_lxvll: + ID = Intrinsic::ppc_vsx_lxvll; + break; } llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, ""); } - // vec_st + // vec_st, vec_xst_be case PPC::BI__builtin_altivec_stvx: case PPC::BI__builtin_altivec_stvxl: case PPC::BI__builtin_altivec_stvebx: @@ -7442,10 +8021,19 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_altivec_stvewx: case PPC::BI__builtin_vsx_stxvd2x: case PPC::BI__builtin_vsx_stxvw4x: + case PPC::BI__builtin_vsx_stxvd2x_be: + case PPC::BI__builtin_vsx_stxvw4x_be: + case PPC::BI__builtin_vsx_stxvl: + case PPC::BI__builtin_vsx_stxvll: { - Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); - Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); - Ops.pop_back(); + if(BuiltinID == PPC::BI__builtin_vsx_stxvl || + BuiltinID == PPC::BI__builtin_vsx_stxvll ){ + Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); + }else { + Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); + Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); + Ops.pop_back(); + } switch (BuiltinID) { default: llvm_unreachable("Unsupported st intrinsic!"); @@ -7470,6 +8058,18 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_vsx_stxvw4x: ID = Intrinsic::ppc_vsx_stxvw4x; break; + case PPC::BI__builtin_vsx_stxvd2x_be: + ID = Intrinsic::ppc_vsx_stxvd2x_be; + break; + case PPC::BI__builtin_vsx_stxvw4x_be: + ID = Intrinsic::ppc_vsx_stxvw4x_be; + break; + case PPC::BI__builtin_vsx_stxvl: + ID = Intrinsic::ppc_vsx_stxvl; + break; + case PPC::BI__builtin_vsx_stxvll: + ID = Intrinsic::ppc_vsx_stxvll; + break; } llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, ""); @@ -7494,6 +8094,25 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); return Builder.CreateCall(F, {X, Undef}); } + case PPC::BI__builtin_altivec_vctzb: + case PPC::BI__builtin_altivec_vctzh: + case PPC::BI__builtin_altivec_vctzw: + case PPC::BI__builtin_altivec_vctzd: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); + return Builder.CreateCall(F, {X, Undef}); + } + case PPC::BI__builtin_altivec_vpopcntb: + case PPC::BI__builtin_altivec_vpopcnth: + case PPC::BI__builtin_altivec_vpopcntw: + case PPC::BI__builtin_altivec_vpopcntd: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); + return Builder.CreateCall(F, X); + } // Copy sign case PPC::BI__builtin_vsx_xvcpsgnsp: case PPC::BI__builtin_vsx_xvcpsgndp: { @@ -7581,6 +8200,85 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } + + case PPC::BI__builtin_vsx_insertword: { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); + + // Third argument is a compile time constant int. It must be clamped to + // to the range [0, 12]. + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + assert(ArgCI && + "Third arg to xxinsertw intrinsic must be constant integer"); + const int64_t MaxIndex = 12; + int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); + + // The builtin semantics don't exactly match the xxinsertw instructions + // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the + // word from the first argument, and inserts it in the second argument. The + // instruction extracts the word from its second input register and inserts + // it into its first input register, so swap the first and second arguments. + std::swap(Ops[0], Ops[1]); + + // Need to cast the second argument from a vector of unsigned int to a + // vector of long long. + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + + if (getTarget().isLittleEndian()) { + // Create a shuffle mask of (1, 0) + Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), + ConstantInt::get(Int32Ty, 0) + }; + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + + // Reverse the double words in the vector we will extract from. + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); + + // Reverse the index. + Index = MaxIndex - Index; + } + + // Intrinsic expects the first arg to be a vector of int. + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); + Ops[2] = ConstantInt::getSigned(Int32Ty, Index); + return Builder.CreateCall(F, Ops); + } + + case PPC::BI__builtin_vsx_extractuword: { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); + + // Intrinsic expects the first argument to be a vector of doublewords. + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + + // The second argument is a compile time constant int that needs to + // be clamped to the range [0, 12]. + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); + assert(ArgCI && + "Second Arg to xxextractuw intrinsic must be a constant integer!"); + const int64_t MaxIndex = 12; + int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); + + if (getTarget().isLittleEndian()) { + // Reverse the index. + Index = MaxIndex - Index; + Ops[1] = ConstantInt::getSigned(Int32Ty, Index); + + // Emit the call, then reverse the double words of the results vector. + Value *Call = Builder.CreateCall(F, Ops); + + // Create a shuffle mask of (1, 0) + Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), + ConstantInt::get(Int32Ty, 0) + }; + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + + Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); + return ShuffleCall; + } else { + Ops[1] = ConstantInt::getSigned(Int32Ty, Index); + return Builder.CreateCall(F, Ops); + } + } } } @@ -7625,45 +8323,73 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); } + + case AMDGPU::BI__builtin_amdgcn_ds_swizzle: + return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); case AMDGPU::BI__builtin_amdgcn_div_fixup: case AMDGPU::BI__builtin_amdgcn_div_fixupf: + case AMDGPU::BI__builtin_amdgcn_div_fixuph: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); case AMDGPU::BI__builtin_amdgcn_trig_preop: case AMDGPU::BI__builtin_amdgcn_trig_preopf: return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); case AMDGPU::BI__builtin_amdgcn_rcp: case AMDGPU::BI__builtin_amdgcn_rcpf: + case AMDGPU::BI__builtin_amdgcn_rcph: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); case AMDGPU::BI__builtin_amdgcn_rsq: case AMDGPU::BI__builtin_amdgcn_rsqf: + case AMDGPU::BI__builtin_amdgcn_rsqh: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); case AMDGPU::BI__builtin_amdgcn_rsq_clamp: case AMDGPU::BI__builtin_amdgcn_rsq_clampf: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); case AMDGPU::BI__builtin_amdgcn_sinf: + case AMDGPU::BI__builtin_amdgcn_sinh: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); case AMDGPU::BI__builtin_amdgcn_cosf: + case AMDGPU::BI__builtin_amdgcn_cosh: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); case AMDGPU::BI__builtin_amdgcn_log_clampf: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); case AMDGPU::BI__builtin_amdgcn_ldexp: case AMDGPU::BI__builtin_amdgcn_ldexpf: + case AMDGPU::BI__builtin_amdgcn_ldexph: return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); case AMDGPU::BI__builtin_amdgcn_frexp_mant: - case AMDGPU::BI__builtin_amdgcn_frexp_mantf: { + case AMDGPU::BI__builtin_amdgcn_frexp_mantf: + case AMDGPU::BI__builtin_amdgcn_frexp_manth: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); - } case AMDGPU::BI__builtin_amdgcn_frexp_exp: case AMDGPU::BI__builtin_amdgcn_frexp_expf: { - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp); + Value *Src0 = EmitScalarExpr(E->getArg(0)); + Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, + { Builder.getInt32Ty(), Src0->getType() }); + return Builder.CreateCall(F, Src0); + } + case AMDGPU::BI__builtin_amdgcn_frexp_exph: { + Value *Src0 = EmitScalarExpr(E->getArg(0)); + Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, + { Builder.getInt16Ty(), Src0->getType() }); + return Builder.CreateCall(F, Src0); } case AMDGPU::BI__builtin_amdgcn_fract: case AMDGPU::BI__builtin_amdgcn_fractf: + case AMDGPU::BI__builtin_amdgcn_fracth: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); case AMDGPU::BI__builtin_amdgcn_lerp: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); + case AMDGPU::BI__builtin_amdgcn_uicmp: + case AMDGPU::BI__builtin_amdgcn_uicmpl: + case AMDGPU::BI__builtin_amdgcn_sicmp: + case AMDGPU::BI__builtin_amdgcn_sicmpl: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); + case AMDGPU::BI__builtin_amdgcn_fcmp: + case AMDGPU::BI__builtin_amdgcn_fcmpf: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); case AMDGPU::BI__builtin_amdgcn_class: case AMDGPU::BI__builtin_amdgcn_classf: + case AMDGPU::BI__builtin_amdgcn_classh: return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); case AMDGPU::BI__builtin_amdgcn_read_exec: { @@ -7951,7 +8677,13 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Ptr->getType()}), {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); }; - + auto MakeScopedAtomic = [&](unsigned IntrinsicID) { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall( + CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), + Ptr->getType()}), + {Ptr, EmitScalarExpr(E->getArg(1))}); + }; switch (BuiltinID) { case NVPTX::BI__nvvm_atom_add_gen_i: case NVPTX::BI__nvvm_atom_add_gen_l: @@ -8070,6 +8802,109 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_ldg_d: case NVPTX::BI__nvvm_ldg_d2: return MakeLdg(Intrinsic::nvvm_ldg_global_f); + + case NVPTX::BI__nvvm_atom_cta_add_gen_i: + case NVPTX::BI__nvvm_atom_cta_add_gen_l: + case NVPTX::BI__nvvm_atom_cta_add_gen_ll: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta); + case NVPTX::BI__nvvm_atom_sys_add_gen_i: + case NVPTX::BI__nvvm_atom_sys_add_gen_l: + case NVPTX::BI__nvvm_atom_sys_add_gen_ll: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys); + case NVPTX::BI__nvvm_atom_cta_add_gen_f: + case NVPTX::BI__nvvm_atom_cta_add_gen_d: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta); + case NVPTX::BI__nvvm_atom_sys_add_gen_f: + case NVPTX::BI__nvvm_atom_sys_add_gen_d: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys); + case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: + case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: + case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta); + case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: + case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: + case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys); + case NVPTX::BI__nvvm_atom_cta_max_gen_i: + case NVPTX::BI__nvvm_atom_cta_max_gen_ui: + case NVPTX::BI__nvvm_atom_cta_max_gen_l: + case NVPTX::BI__nvvm_atom_cta_max_gen_ul: + case NVPTX::BI__nvvm_atom_cta_max_gen_ll: + case NVPTX::BI__nvvm_atom_cta_max_gen_ull: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta); + case NVPTX::BI__nvvm_atom_sys_max_gen_i: + case NVPTX::BI__nvvm_atom_sys_max_gen_ui: + case NVPTX::BI__nvvm_atom_sys_max_gen_l: + case NVPTX::BI__nvvm_atom_sys_max_gen_ul: + case NVPTX::BI__nvvm_atom_sys_max_gen_ll: + case NVPTX::BI__nvvm_atom_sys_max_gen_ull: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys); + case NVPTX::BI__nvvm_atom_cta_min_gen_i: + case NVPTX::BI__nvvm_atom_cta_min_gen_ui: + case NVPTX::BI__nvvm_atom_cta_min_gen_l: + case NVPTX::BI__nvvm_atom_cta_min_gen_ul: + case NVPTX::BI__nvvm_atom_cta_min_gen_ll: + case NVPTX::BI__nvvm_atom_cta_min_gen_ull: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta); + case NVPTX::BI__nvvm_atom_sys_min_gen_i: + case NVPTX::BI__nvvm_atom_sys_min_gen_ui: + case NVPTX::BI__nvvm_atom_sys_min_gen_l: + case NVPTX::BI__nvvm_atom_sys_min_gen_ul: + case NVPTX::BI__nvvm_atom_sys_min_gen_ll: + case NVPTX::BI__nvvm_atom_sys_min_gen_ull: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys); + case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta); + case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta); + case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys); + case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys); + case NVPTX::BI__nvvm_atom_cta_and_gen_i: + case NVPTX::BI__nvvm_atom_cta_and_gen_l: + case NVPTX::BI__nvvm_atom_cta_and_gen_ll: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta); + case NVPTX::BI__nvvm_atom_sys_and_gen_i: + case NVPTX::BI__nvvm_atom_sys_and_gen_l: + case NVPTX::BI__nvvm_atom_sys_and_gen_ll: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys); + case NVPTX::BI__nvvm_atom_cta_or_gen_i: + case NVPTX::BI__nvvm_atom_cta_or_gen_l: + case NVPTX::BI__nvvm_atom_cta_or_gen_ll: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta); + case NVPTX::BI__nvvm_atom_sys_or_gen_i: + case NVPTX::BI__nvvm_atom_sys_or_gen_l: + case NVPTX::BI__nvvm_atom_sys_or_gen_ll: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys); + case NVPTX::BI__nvvm_atom_cta_xor_gen_i: + case NVPTX::BI__nvvm_atom_cta_xor_gen_l: + case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta); + case NVPTX::BI__nvvm_atom_sys_xor_gen_i: + case NVPTX::BI__nvvm_atom_sys_xor_gen_l: + case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: + return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys); + case NVPTX::BI__nvvm_atom_cta_cas_gen_i: + case NVPTX::BI__nvvm_atom_cta_cas_gen_l: + case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall( + CGM.getIntrinsic( + Intrinsic::nvvm_atomic_cas_gen_i_cta, + {Ptr->getType()->getPointerElementType(), Ptr->getType()}), + {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); + } + case NVPTX::BI__nvvm_atom_sys_cas_gen_i: + case NVPTX::BI__nvvm_atom_sys_cas_gen_l: + case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall( + CGM.getIntrinsic( + Intrinsic::nvvm_atomic_cas_gen_i_sys, + {Ptr->getType()->getPointerElementType(), Ptr->getType()}), + {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); + } default: return nullptr; } |