summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp1666
1 files changed, 1236 insertions, 430 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
index 787ac53..a5fc531 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
#include <sstream>
using namespace clang;
@@ -105,9 +106,8 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
llvm::Type *ValueType = Args[1]->getType();
Args[1] = EmitToInt(CGF, Args[1], T, IntType);
- llvm::Value *Result =
- CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
- llvm::SequentiallyConsistent);
+ llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
+ Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
return EmitFromInt(CGF, Result, T, ValueType);
}
@@ -167,9 +167,8 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
Args[1] = EmitToInt(CGF, Args[1], T, IntType);
Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
- llvm::Value *Result =
- CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
- llvm::SequentiallyConsistent);
+ llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
+ Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
if (Invert)
Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
@@ -206,9 +205,9 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
Args[1] = EmitToInt(CGF, Args[1], T, IntType);
Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
- Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
- llvm::SequentiallyConsistent,
- llvm::SequentiallyConsistent);
+ Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
+ Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
+ llvm::AtomicOrdering::SequentiallyConsistent);
if (ReturnBool)
// Extract boolean success flag and zext it to int.
return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
@@ -219,6 +218,51 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
ValueType);
}
+// Emit a simple mangled intrinsic that has 1 argument and a return type
+// matching the argument type.
+static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
+ const CallExpr *E,
+ unsigned IntrinsicID) {
+ llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+
+ Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+ return CGF.Builder.CreateCall(F, Src0);
+}
+
+// Emit an intrinsic that has 2 operands of the same type as its result.
+static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
+ const CallExpr *E,
+ unsigned IntrinsicID) {
+ llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+
+ Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+ return CGF.Builder.CreateCall(F, { Src0, Src1 });
+}
+
+// Emit an intrinsic that has 3 operands of the same type as its result.
+static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
+ const CallExpr *E,
+ unsigned IntrinsicID) {
+ llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
+
+ Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+ return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
+}
+
+// Emit an intrinsic that has 1 float or double operand, and 1 integer.
+static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
+ const CallExpr *E,
+ unsigned IntrinsicID) {
+ llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+
+ Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+ return CGF.Builder.CreateCall(F, {Src0, Src1});
+}
+
/// EmitFAbs - Emit a call to @llvm.fabs().
static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
@@ -248,8 +292,8 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
if (CGF.getTarget().isBigEndian()) {
Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
V = CGF.Builder.CreateLShr(V, ShiftCst);
- }
- // We are truncating value in order to extract the higher-order
+ }
+ // We are truncating value in order to extract the higher-order
// double, which we will be using to extract the sign from.
IntTy = llvm::IntegerType::get(C, Width);
V = CGF.Builder.CreateTrunc(V, IntTy);
@@ -288,6 +332,17 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
return CGF.Builder.CreateExtractValue(Tmp, 0);
}
+static Value *emitRangedBuiltin(CodeGenFunction &CGF,
+ unsigned IntrinsicID,
+ int low, int high) {
+ llvm::MDBuilder MDHelper(CGF.getLLVMContext());
+ llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
+ Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
+ llvm::Instruction *Call = CGF.Builder.CreateCall(F);
+ Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
+ return Call;
+}
+
namespace {
struct WidthAndSignedness {
unsigned Width;
@@ -465,9 +520,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_fabs:
case Builtin::BI__builtin_fabsf:
case Builtin::BI__builtin_fabsl: {
- Value *Arg1 = EmitScalarExpr(E->getArg(0));
- Value *Result = EmitFAbs(*this, Arg1);
- return RValue::get(Result);
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
}
case Builtin::BI__builtin_fmod:
case Builtin::BI__builtin_fmodf:
@@ -477,7 +530,51 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
return RValue::get(Result);
}
-
+ case Builtin::BI__builtin_copysign:
+ case Builtin::BI__builtin_copysignf:
+ case Builtin::BI__builtin_copysignl: {
+ return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
+ }
+ case Builtin::BI__builtin_ceil:
+ case Builtin::BI__builtin_ceilf:
+ case Builtin::BI__builtin_ceill: {
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
+ }
+ case Builtin::BI__builtin_floor:
+ case Builtin::BI__builtin_floorf:
+ case Builtin::BI__builtin_floorl: {
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
+ }
+ case Builtin::BI__builtin_trunc:
+ case Builtin::BI__builtin_truncf:
+ case Builtin::BI__builtin_truncl: {
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
+ }
+ case Builtin::BI__builtin_rint:
+ case Builtin::BI__builtin_rintf:
+ case Builtin::BI__builtin_rintl: {
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
+ }
+ case Builtin::BI__builtin_nearbyint:
+ case Builtin::BI__builtin_nearbyintf:
+ case Builtin::BI__builtin_nearbyintl: {
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
+ }
+ case Builtin::BI__builtin_round:
+ case Builtin::BI__builtin_roundf:
+ case Builtin::BI__builtin_roundl: {
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
+ }
+ case Builtin::BI__builtin_fmin:
+ case Builtin::BI__builtin_fminf:
+ case Builtin::BI__builtin_fminl: {
+ return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
+ }
+ case Builtin::BI__builtin_fmax:
+ case Builtin::BI__builtin_fmaxf:
+ case Builtin::BI__builtin_fmaxl: {
+ return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
+ }
case Builtin::BI__builtin_conj:
case Builtin::BI__builtin_conjf:
case Builtin::BI__builtin_conjl: {
@@ -645,10 +742,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_bswap16:
case Builtin::BI__builtin_bswap32:
case Builtin::BI__builtin_bswap64: {
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- llvm::Type *ArgType = ArgValue->getType();
- Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
- return RValue::get(Builder.CreateCall(F, ArgValue));
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
+ }
+ case Builtin::BI__builtin_bitreverse8:
+ case Builtin::BI__builtin_bitreverse16:
+ case Builtin::BI__builtin_bitreverse32:
+ case Builtin::BI__builtin_bitreverse64: {
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
}
case Builtin::BI__builtin_object_size: {
unsigned Type =
@@ -751,13 +851,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
}
- case Builtin::BI__builtin_isinf: {
- // isinf(x) --> fabs(x) == infinity
+ case Builtin::BI__builtin_isinf:
+ case Builtin::BI__builtin_isfinite: {
+ // isinf(x) --> fabs(x) == infinity
+ // isfinite(x) --> fabs(x) != infinity
+ // x != NaN via the ordered compare in either case.
Value *V = EmitScalarExpr(E->getArg(0));
- V = EmitFAbs(*this, V);
-
- V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
- return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
+ Value *Fabs = EmitFAbs(*this, V);
+ Constant *Infinity = ConstantFP::getInfinity(V->getType());
+ CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
+ ? CmpInst::FCMP_OEQ
+ : CmpInst::FCMP_ONE;
+ Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
+ return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
}
case Builtin::BI__builtin_isinf_sign: {
@@ -795,19 +901,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
}
- case Builtin::BI__builtin_isfinite: {
- // isfinite(x) --> x == x && fabs(x) != infinity;
- Value *V = EmitScalarExpr(E->getArg(0));
- Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
-
- Value *Abs = EmitFAbs(*this, V);
- Value *IsNotInf =
- Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
-
- V = Builder.CreateAnd(Eq, IsNotInf, "and");
- return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
- }
-
case Builtin::BI__builtin_fpclassify: {
Value *V = EmitScalarExpr(E->getArg(5));
llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
@@ -1258,7 +1351,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::StoreInst *Store =
Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
StoreSize);
- Store->setAtomic(llvm::Release);
+ Store->setAtomic(llvm::AtomicOrdering::Release);
return RValue::get(nullptr);
}
@@ -1270,7 +1363,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// any way to safely use it... but in practice, it mostly works
// to use it with non-atomic loads and stores to get acquire/release
// semantics.
- Builder.CreateFence(llvm::SequentiallyConsistent);
+ Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
return RValue::get(nullptr);
}
@@ -1294,9 +1387,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
getContext().VoidPtrTy);
const CGFunctionInfo &FuncInfo =
- CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
- FunctionType::ExtInfo(),
- RequiredArgs::All);
+ CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
@@ -1320,30 +1411,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
switch (ord) {
case 0: // memory_order_relaxed
default: // invalid order
- Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
- Ptr, NewVal,
- llvm::Monotonic);
+ Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+ llvm::AtomicOrdering::Monotonic);
break;
- case 1: // memory_order_consume
- case 2: // memory_order_acquire
- Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
- Ptr, NewVal,
- llvm::Acquire);
+ case 1: // memory_order_consume
+ case 2: // memory_order_acquire
+ Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+ llvm::AtomicOrdering::Acquire);
break;
- case 3: // memory_order_release
- Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
- Ptr, NewVal,
- llvm::Release);
+ case 3: // memory_order_release
+ Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+ llvm::AtomicOrdering::Release);
break;
- case 4: // memory_order_acq_rel
- Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
- Ptr, NewVal,
- llvm::AcquireRelease);
+ case 4: // memory_order_acq_rel
+
+ Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+ llvm::AtomicOrdering::AcquireRelease);
break;
- case 5: // memory_order_seq_cst
- Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
- Ptr, NewVal,
- llvm::SequentiallyConsistent);
+ case 5: // memory_order_seq_cst
+ Result = Builder.CreateAtomicRMW(
+ llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+ llvm::AtomicOrdering::SequentiallyConsistent);
break;
}
Result->setVolatile(Volatile);
@@ -1360,9 +1448,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
createBasicBlock("seqcst", CurFn)
};
llvm::AtomicOrdering Orders[5] = {
- llvm::Monotonic, llvm::Acquire, llvm::Release,
- llvm::AcquireRelease, llvm::SequentiallyConsistent
- };
+ llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
+ llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
+ llvm::AtomicOrdering::SequentiallyConsistent};
Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
@@ -1406,13 +1494,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
switch (ord) {
case 0: // memory_order_relaxed
default: // invalid order
- Store->setOrdering(llvm::Monotonic);
+ Store->setOrdering(llvm::AtomicOrdering::Monotonic);
break;
case 3: // memory_order_release
- Store->setOrdering(llvm::Release);
+ Store->setOrdering(llvm::AtomicOrdering::Release);
break;
case 5: // memory_order_seq_cst
- Store->setOrdering(llvm::SequentiallyConsistent);
+ Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
break;
}
return RValue::get(nullptr);
@@ -1426,8 +1514,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
createBasicBlock("seqcst", CurFn)
};
llvm::AtomicOrdering Orders[3] = {
- llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
- };
+ llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
+ llvm::AtomicOrdering::SequentiallyConsistent};
Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
@@ -1466,16 +1554,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
break;
case 1: // memory_order_consume
case 2: // memory_order_acquire
- Builder.CreateFence(llvm::Acquire, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
break;
case 3: // memory_order_release
- Builder.CreateFence(llvm::Release, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
break;
case 4: // memory_order_acq_rel
- Builder.CreateFence(llvm::AcquireRelease, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
break;
case 5: // memory_order_seq_cst
- Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
+ Scope);
break;
}
return RValue::get(nullptr);
@@ -1492,23 +1581,23 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
Builder.SetInsertPoint(AcquireBB);
- Builder.CreateFence(llvm::Acquire, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(1), AcquireBB);
SI->addCase(Builder.getInt32(2), AcquireBB);
Builder.SetInsertPoint(ReleaseBB);
- Builder.CreateFence(llvm::Release, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(3), ReleaseBB);
Builder.SetInsertPoint(AcqRelBB);
- Builder.CreateFence(llvm::AcquireRelease, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(4), AcqRelBB);
Builder.SetInsertPoint(SeqCstBB);
- Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(5), SeqCstBB);
@@ -1794,7 +1883,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
break;
}
-
+
llvm::Value *Carry;
llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
Builder.CreateStore(Sum, SumOutPtr);
@@ -1839,9 +1928,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Value *Comparand =
Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
- auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
- SequentiallyConsistent,
- SequentiallyConsistent);
+ auto Result =
+ Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+ AtomicOrdering::SequentiallyConsistent,
+ AtomicOrdering::SequentiallyConsistent);
Result->setVolatile(true);
return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
@@ -1853,44 +1943,47 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
EmitScalarExpr(E->getArg(0)),
EmitScalarExpr(E->getArg(2)),
EmitScalarExpr(E->getArg(1)),
- SequentiallyConsistent,
- SequentiallyConsistent);
+ AtomicOrdering::SequentiallyConsistent,
+ AtomicOrdering::SequentiallyConsistent);
CXI->setVolatile(true);
return RValue::get(Builder.CreateExtractValue(CXI, 0));
}
case Builtin::BI_InterlockedIncrement: {
+ llvm::Type *IntTy = ConvertType(E->getType());
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
AtomicRMWInst::Add,
EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(Int32Ty, 1),
- llvm::SequentiallyConsistent);
+ ConstantInt::get(IntTy, 1),
+ llvm::AtomicOrdering::SequentiallyConsistent);
RMWI->setVolatile(true);
- return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1)));
+ return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)));
}
case Builtin::BI_InterlockedDecrement: {
+ llvm::Type *IntTy = ConvertType(E->getType());
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
AtomicRMWInst::Sub,
EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(Int32Ty, 1),
- llvm::SequentiallyConsistent);
+ ConstantInt::get(IntTy, 1),
+ llvm::AtomicOrdering::SequentiallyConsistent);
RMWI->setVolatile(true);
- return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1)));
+ return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)));
}
case Builtin::BI_InterlockedExchangeAdd: {
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
AtomicRMWInst::Add,
EmitScalarExpr(E->getArg(0)),
EmitScalarExpr(E->getArg(1)),
- llvm::SequentiallyConsistent);
+ llvm::AtomicOrdering::SequentiallyConsistent);
RMWI->setVolatile(true);
return RValue::get(RMWI);
}
case Builtin::BI__readfsdword: {
+ llvm::Type *IntTy = ConvertType(E->getType());
Value *IntToPtr =
Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
- llvm::PointerType::get(CGM.Int32Ty, 257));
+ llvm::PointerType::get(IntTy, 257));
LoadInst *Load =
- Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true);
+ Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true);
return RValue::get(Load);
}
@@ -1963,6 +2056,323 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
break;
}
+
+ // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
+ case Builtin::BIread_pipe:
+ case Builtin::BIwrite_pipe: {
+ Value *Arg0 = EmitScalarExpr(E->getArg(0)),
+ *Arg1 = EmitScalarExpr(E->getArg(1));
+
+ // Type of the generic packet parameter.
+ unsigned GenericAS =
+ getContext().getTargetAddressSpace(LangAS::opencl_generic);
+ llvm::Type *I8PTy = llvm::PointerType::get(
+ llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
+
+ // Testing which overloaded version we should generate the call for.
+ if (2U == E->getNumArgs()) {
+ const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
+ : "__write_pipe_2";
+ // Creating a generic function type to be able to call with any builtin or
+ // user defined type.
+ llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy};
+ llvm::FunctionType *FTy = llvm::FunctionType::get(
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+ Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
+ return RValue::get(Builder.CreateCall(
+ CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast}));
+ } else {
+ assert(4 == E->getNumArgs() &&
+ "Illegal number of parameters to pipe function");
+ const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
+ : "__write_pipe_4";
+
+ llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy};
+ Value *Arg2 = EmitScalarExpr(E->getArg(2)),
+ *Arg3 = EmitScalarExpr(E->getArg(3));
+ llvm::FunctionType *FTy = llvm::FunctionType::get(
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+ Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
+ // We know the third argument is an integer type, but we may need to cast
+ // it to i32.
+ if (Arg2->getType() != Int32Ty)
+ Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
+ return RValue::get(Builder.CreateCall(
+ CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast}));
+ }
+ }
+ // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
+ // functions
+ case Builtin::BIreserve_read_pipe:
+ case Builtin::BIreserve_write_pipe:
+ case Builtin::BIwork_group_reserve_read_pipe:
+ case Builtin::BIwork_group_reserve_write_pipe:
+ case Builtin::BIsub_group_reserve_read_pipe:
+ case Builtin::BIsub_group_reserve_write_pipe: {
+ // Composing the mangled name for the function.
+ const char *Name;
+ if (BuiltinID == Builtin::BIreserve_read_pipe)
+ Name = "__reserve_read_pipe";
+ else if (BuiltinID == Builtin::BIreserve_write_pipe)
+ Name = "__reserve_write_pipe";
+ else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
+ Name = "__work_group_reserve_read_pipe";
+ else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
+ Name = "__work_group_reserve_write_pipe";
+ else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
+ Name = "__sub_group_reserve_read_pipe";
+ else
+ Name = "__sub_group_reserve_write_pipe";
+
+ Value *Arg0 = EmitScalarExpr(E->getArg(0)),
+ *Arg1 = EmitScalarExpr(E->getArg(1));
+ llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
+
+ // Building the generic function prototype.
+ llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty};
+ llvm::FunctionType *FTy = llvm::FunctionType::get(
+ ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+ // We know the second argument is an integer type, but we may need to cast
+ // it to i32.
+ if (Arg1->getType() != Int32Ty)
+ Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
+ return RValue::get(
+ Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
+ }
+ // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
+ // functions
+ case Builtin::BIcommit_read_pipe:
+ case Builtin::BIcommit_write_pipe:
+ case Builtin::BIwork_group_commit_read_pipe:
+ case Builtin::BIwork_group_commit_write_pipe:
+ case Builtin::BIsub_group_commit_read_pipe:
+ case Builtin::BIsub_group_commit_write_pipe: {
+ const char *Name;
+ if (BuiltinID == Builtin::BIcommit_read_pipe)
+ Name = "__commit_read_pipe";
+ else if (BuiltinID == Builtin::BIcommit_write_pipe)
+ Name = "__commit_write_pipe";
+ else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
+ Name = "__work_group_commit_read_pipe";
+ else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
+ Name = "__work_group_commit_write_pipe";
+ else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
+ Name = "__sub_group_commit_read_pipe";
+ else
+ Name = "__sub_group_commit_write_pipe";
+
+ Value *Arg0 = EmitScalarExpr(E->getArg(0)),
+ *Arg1 = EmitScalarExpr(E->getArg(1));
+
+ // Building the generic function prototype.
+ llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()};
+ llvm::FunctionType *FTy =
+ llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
+ llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+
+ return RValue::get(
+ Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
+ }
+ // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
+ case Builtin::BIget_pipe_num_packets:
+ case Builtin::BIget_pipe_max_packets: {
+ const char *Name;
+ if (BuiltinID == Builtin::BIget_pipe_num_packets)
+ Name = "__get_pipe_num_packets";
+ else
+ Name = "__get_pipe_max_packets";
+
+ // Building the generic function prototype.
+ Value *Arg0 = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ArgTys[] = {Arg0->getType()};
+ llvm::FunctionType *FTy = llvm::FunctionType::get(
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+
+ return RValue::get(
+ Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0}));
+ }
+
+ // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
+ case Builtin::BIto_global:
+ case Builtin::BIto_local:
+ case Builtin::BIto_private: {
+ auto Arg0 = EmitScalarExpr(E->getArg(0));
+ auto NewArgT = llvm::PointerType::get(Int8Ty,
+ CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
+ auto NewRetT = llvm::PointerType::get(Int8Ty,
+ CGM.getContext().getTargetAddressSpace(
+ E->getType()->getPointeeType().getAddressSpace()));
+ auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
+ llvm::Value *NewArg;
+ if (Arg0->getType()->getPointerAddressSpace() !=
+ NewArgT->getPointerAddressSpace())
+ NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
+ else
+ NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
+ auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
+ auto NewCall =
+ Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
+ return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
+ ConvertType(E->getType())));
+ }
+
+ // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
+ // It contains four different overload formats specified in Table 6.13.17.1.
+ case Builtin::BIenqueue_kernel: {
+ StringRef Name; // Generated function call name
+ unsigned NumArgs = E->getNumArgs();
+
+ llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
+ llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
+
+ llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
+ llvm::Value *Range = EmitScalarExpr(E->getArg(2));
+
+ if (NumArgs == 4) {
+ // The most basic form of the call with parameters:
+ // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
+ Name = "__enqueue_kernel_basic";
+ llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
+ llvm::FunctionType *FTy = llvm::FunctionType::get(
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
+
+ llvm::Value *Block =
+ Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
+
+ return RValue::get(Builder.CreateCall(
+ CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
+ }
+ assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
+
+ // Could have events and/or vaargs.
+ if (E->getArg(3)->getType()->isBlockPointerType()) {
+ // No events passed, but has variadic arguments.
+ Name = "__enqueue_kernel_vaargs";
+ llvm::Value *Block =
+ Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
+ // Create a vector of the arguments, as well as a constant value to
+ // express to the runtime the number of variadic arguments.
+ std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
+ ConstantInt::get(IntTy, NumArgs - 4)};
+ std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
+ IntTy};
+
+ // Add the variadics.
+ for (unsigned I = 4; I < NumArgs; ++I) {
+ llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
+ unsigned TypeSizeInBytes =
+ getContext()
+ .getTypeSizeInChars(E->getArg(I)->getType())
+ .getQuantity();
+ Args.push_back(TypeSizeInBytes < 4
+ ? Builder.CreateZExt(ArgSize, Int32Ty)
+ : ArgSize);
+ }
+
+ llvm::FunctionType *FTy = llvm::FunctionType::get(
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
+ return RValue::get(
+ Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+ llvm::ArrayRef<llvm::Value *>(Args)));
+ }
+ // Any calls now have event arguments passed.
+ if (NumArgs >= 7) {
+ llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
+ unsigned AS4 =
+ E->getArg(4)->getType()->isArrayType()
+ ? E->getArg(4)->getType().getAddressSpace()
+ : E->getArg(4)->getType()->getPointeeType().getAddressSpace();
+ llvm::Type *EventPtrAS4Ty =
+ EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4));
+ unsigned AS5 =
+ E->getArg(5)->getType()->getPointeeType().getAddressSpace();
+ llvm::Type *EventPtrAS5Ty =
+ EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5));
+
+ llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3));
+ llvm::Value *EventList =
+ E->getArg(4)->getType()->isArrayType()
+ ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
+ : EmitScalarExpr(E->getArg(4));
+ llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
+ llvm::Value *Block =
+ Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
+
+ std::vector<llvm::Type *> ArgTys = {
+ QueueTy, Int32Ty, RangeTy, Int32Ty,
+ EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy};
+ std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
+ EventList, ClkEvent, Block};
+
+ if (NumArgs == 7) {
+ // Has events but no variadics.
+ Name = "__enqueue_kernel_basic_events";
+ llvm::FunctionType *FTy = llvm::FunctionType::get(
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+ return RValue::get(
+ Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+ llvm::ArrayRef<llvm::Value *>(Args)));
+ }
+ // Has event info and variadics
+ // Pass the number of variadics to the runtime function too.
+ Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
+ ArgTys.push_back(Int32Ty);
+ Name = "__enqueue_kernel_events_vaargs";
+
+ // Add the variadics.
+ for (unsigned I = 7; I < NumArgs; ++I) {
+ llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
+ unsigned TypeSizeInBytes =
+ getContext()
+ .getTypeSizeInChars(E->getArg(I)->getType())
+ .getQuantity();
+ Args.push_back(TypeSizeInBytes < 4
+ ? Builder.CreateZExt(ArgSize, Int32Ty)
+ : ArgSize);
+ }
+ llvm::FunctionType *FTy = llvm::FunctionType::get(
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
+ return RValue::get(
+ Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+ llvm::ArrayRef<llvm::Value *>(Args)));
+ }
+ }
+ // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
+ // parameter.
+ case Builtin::BIget_kernel_work_group_size: {
+ Value *Arg = EmitScalarExpr(E->getArg(0));
+ Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
+ return RValue::get(
+ Builder.CreateCall(CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(IntTy, Int8PtrTy, false),
+ "__get_kernel_work_group_size_impl"),
+ Arg));
+ }
+ case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
+ Value *Arg = EmitScalarExpr(E->getArg(0));
+ Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
+ return RValue::get(Builder.CreateCall(
+ CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(IntTy, Int8PtrTy, false),
+ "__get_kernel_preferred_work_group_multiple_impl"),
+ Arg));
+ }
+ case Builtin::BIprintf:
+ if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
+ return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
+ break;
+ case Builtin::BI__builtin_canonicalize:
+ case Builtin::BI__builtin_canonicalizef:
+ case Builtin::BI__builtin_canonicalizel:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
+
+ case Builtin::BI__builtin_thread_pointer: {
+ if (!getContext().getTargetInfo().isTLSSupported())
+ CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
+ // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
+ break;
+ }
}
// If this is an alias for a lib function (e.g. __builtin_sin), emit
@@ -2155,7 +2565,7 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
}
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
- unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
+ unsigned nElts = V->getType()->getVectorNumElements();
Value* SV = llvm::ConstantVector::getSplat(nElts, C);
return Builder.CreateShuffleVector(V, V, SV, "lane");
}
@@ -3073,14 +3483,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vext_v:
case NEON::BI__builtin_neon_vextq_v: {
int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
- SmallVector<Constant*, 16> Indices;
+ SmallVector<uint32_t, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
- Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
+ Indices.push_back(i+CV);
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Value *SV = llvm::ConstantVector::get(Indices);
- return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
+ return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
}
case NEON::BI__builtin_neon_vfma_v:
case NEON::BI__builtin_neon_vfmaq_v: {
@@ -3278,14 +3687,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<Constant*, 16> Indices;
+ SmallVector<uint32_t, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
- Indices.push_back(Builder.getInt32(i+vi));
- Indices.push_back(Builder.getInt32(i+e+vi));
+ Indices.push_back(i+vi);
+ Indices.push_back(i+e+vi);
}
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
- SV = llvm::ConstantVector::get(Indices);
- SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
+ SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
SV = Builder.CreateDefaultAlignedStore(SV, Addr);
}
return SV;
@@ -3307,13 +3715,12 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<Constant*, 16> Indices;
+ SmallVector<uint32_t, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
- Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
+ Indices.push_back(2*i+vi);
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
- SV = llvm::ConstantVector::get(Indices);
- SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
+ SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
SV = Builder.CreateDefaultAlignedStore(SV, Addr);
}
return SV;
@@ -3326,14 +3733,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<Constant*, 16> Indices;
+ SmallVector<uint32_t, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
- Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
- Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
+ Indices.push_back((i + vi*e) >> 1);
+ Indices.push_back(((i + vi*e) >> 1)+e);
}
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
- SV = llvm::ConstantVector::get(Indices);
- SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
+ SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
SV = Builder.CreateDefaultAlignedStore(SV, Addr);
}
return SV;
@@ -3381,19 +3787,19 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
if (ExtOp)
TblOps.push_back(ExtOp);
- // Build a vector containing sequential number like (0, 1, 2, ..., 15)
- SmallVector<Constant*, 16> Indices;
+ // Build a vector containing sequential number like (0, 1, 2, ..., 15)
+ SmallVector<uint32_t, 16> Indices;
llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
- Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
- Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
+ Indices.push_back(2*i);
+ Indices.push_back(2*i+1);
}
- Value *SV = llvm::ConstantVector::get(Indices);
int PairPos = 0, End = Ops.size() - 1;
while (PairPos < End) {
TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
- Ops[PairPos+1], SV, Name));
+ Ops[PairPos+1], Indices,
+ Name));
PairPos += 2;
}
@@ -3402,13 +3808,13 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
if (PairPos == End) {
Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
- ZeroTbl, SV, Name));
+ ZeroTbl, Indices, Name));
}
Function *TblF;
TblOps.push_back(IndexOp);
TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
-
+
return CGF.EmitNeonCall(TblF, TblOps, Name);
}
@@ -3452,7 +3858,9 @@ Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
const CallExpr *E,
llvm::Type *RegisterType,
- llvm::Type *ValueType, bool IsRead) {
+ llvm::Type *ValueType,
+ bool IsRead,
+ StringRef SysReg = "") {
// write and register intrinsics only support 32 and 64 bit operations.
assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
&& "Unsupported size for register.");
@@ -3461,8 +3869,10 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
CodeGen::CodeGenModule &CGM = CGF.CGM;
LLVMContext &Context = CGM.getLLVMContext();
- const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
- StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
+ if (SysReg.empty()) {
+ const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
+ SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
+ }
llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
@@ -3602,6 +4012,74 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
}
+ if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
+ BuiltinID == ARM::BI__builtin_arm_mcrr2) {
+ Function *F;
+
+ switch (BuiltinID) {
+ default: llvm_unreachable("unexpected builtin");
+ case ARM::BI__builtin_arm_mcrr:
+ F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
+ break;
+ case ARM::BI__builtin_arm_mcrr2:
+ F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
+ break;
+ }
+
+ // MCRR{2} instruction has 5 operands but
+ // the intrinsic has 4 because Rt and Rt2
+ // are represented as a single unsigned 64
+ // bit integer in the intrinsic definition
+ // but internally it's represented as 2 32
+ // bit integers.
+
+ Value *Coproc = EmitScalarExpr(E->getArg(0));
+ Value *Opc1 = EmitScalarExpr(E->getArg(1));
+ Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
+ Value *CRm = EmitScalarExpr(E->getArg(3));
+
+ Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
+ Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
+ Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
+ Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
+
+ return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
+ }
+
+ if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
+ BuiltinID == ARM::BI__builtin_arm_mrrc2) {
+ Function *F;
+
+ switch (BuiltinID) {
+ default: llvm_unreachable("unexpected builtin");
+ case ARM::BI__builtin_arm_mrrc:
+ F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
+ break;
+ case ARM::BI__builtin_arm_mrrc2:
+ F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
+ break;
+ }
+
+ Value *Coproc = EmitScalarExpr(E->getArg(0));
+ Value *Opc1 = EmitScalarExpr(E->getArg(1));
+ Value *CRm = EmitScalarExpr(E->getArg(2));
+ Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
+
+ // Returns an unsigned 64 bit integer, represented
+ // as two 32 bit integers.
+
+ Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
+ Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
+ Rt = Builder.CreateZExt(Rt, Int64Ty);
+ Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
+
+ Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
+ RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
+ RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
+
+ return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
+ }
+
if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
((BuiltinID == ARM::BI__builtin_arm_ldrex ||
BuiltinID == ARM::BI__builtin_arm_ldaex) &&
@@ -3914,7 +4392,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
// the first argument, but the LLVM intrinsic expects it as the third one.
case ARM::BI_MoveToCoprocessor:
case ARM::BI_MoveToCoprocessor2: {
- Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
+ Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
Ops[3], Ops[4], Ops[5]});
@@ -4478,11 +4956,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F);
}
- if (BuiltinID == AArch64::BI__builtin_thread_pointer) {
- Function *F = CGM.getIntrinsic(Intrinsic::aarch64_thread_pointer);
- return Builder.CreateCall(F);
- }
-
// CRC32
Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
switch (BuiltinID) {
@@ -5150,22 +5623,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
}
- case NEON::BI__builtin_neon_vfms_v:
- case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types
- // FIXME: probably remove when we no longer support aarch64_simd.h
- // (arm_neon.h delegates to vfma).
-
- // The ARM builtins (and instructions) have the addend as the first
- // operand, but the 'fma' intrinsics have it last. Swap it around here.
- Value *Subtrahend = Ops[0];
- Value *Multiplicand = Ops[2];
- Ops[0] = Multiplicand;
- Ops[2] = Subtrahend;
- Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
- Ops[1] = Builder.CreateFNeg(Ops[1]);
- Int = Intrinsic::fma;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls");
- }
case NEON::BI__builtin_neon_vmull_v:
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
@@ -5988,14 +6445,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<Constant*, 16> Indices;
+ SmallVector<uint32_t, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
- Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
- Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
+ Indices.push_back(i+vi);
+ Indices.push_back(i+e+vi);
}
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
- SV = llvm::ConstantVector::get(Indices);
- SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
+ SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
SV = Builder.CreateDefaultAlignedStore(SV, Addr);
}
return SV;
@@ -6008,13 +6464,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<Constant*, 16> Indices;
+ SmallVector<uint32_t, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
- Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
+ Indices.push_back(2*i+vi);
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
- SV = llvm::ConstantVector::get(Indices);
- SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
+ SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
SV = Builder.CreateDefaultAlignedStore(SV, Addr);
}
return SV;
@@ -6027,14 +6482,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) {
- SmallVector<Constant*, 16> Indices;
+ SmallVector<uint32_t, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
- Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
- Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
+ Indices.push_back((i + vi*e) >> 1);
+ Indices.push_back(((i + vi*e) >> 1)+e);
}
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
- SV = llvm::ConstantVector::get(Indices);
- SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
+ SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
SV = Builder.CreateDefaultAlignedStore(SV, Addr);
}
return SV;
@@ -6110,6 +6564,118 @@ BuildVector(ArrayRef<llvm::Value*> Ops) {
return Result;
}
+// Convert the mask from an integer type to a vector of i1.
+static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
+ unsigned NumElts) {
+
+ llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
+ cast<IntegerType>(Mask->getType())->getBitWidth());
+ Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
+
+ // If we have less than 8 elements, then the starting mask was an i8 and
+ // we need to extract down to the right number of elements.
+ if (NumElts < 8) {
+ uint32_t Indices[4];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i;
+ MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
+ makeArrayRef(Indices, NumElts),
+ "extract");
+ }
+ return MaskVec;
+}
+
+static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned Align) {
+ // Cast the pointer to right type.
+ Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(Ops[1]->getType()));
+
+ // If the mask is all ones just emit a regular store.
+ if (const auto *C = dyn_cast<Constant>(Ops[2]))
+ if (C->isAllOnesValue())
+ return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
+
+ Value *MaskVec = getMaskVecValue(CGF, Ops[2],
+ Ops[1]->getType()->getVectorNumElements());
+
+ return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
+}
+
+static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
+ SmallVectorImpl<Value *> &Ops, unsigned Align) {
+ // Cast the pointer to right type.
+ Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(Ops[1]->getType()));
+
+ // If the mask is all ones just emit a regular store.
+ if (const auto *C = dyn_cast<Constant>(Ops[2]))
+ if (C->isAllOnesValue())
+ return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
+
+ Value *MaskVec = getMaskVecValue(CGF, Ops[2],
+ Ops[1]->getType()->getVectorNumElements());
+
+ return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
+}
+
+static Value *EmitX86Select(CodeGenFunction &CGF,
+ Value *Mask, Value *Op0, Value *Op1) {
+
+ // If the mask is all ones just return first argument.
+ if (const auto *C = dyn_cast<Constant>(Mask))
+ if (C->isAllOnesValue())
+ return Op0;
+
+ Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
+
+ return CGF.Builder.CreateSelect(Mask, Op0, Op1);
+}
+
+static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
+ bool Signed, SmallVectorImpl<Value *> &Ops) {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ Value *Cmp;
+
+ if (CC == 3) {
+ Cmp = Constant::getNullValue(
+ llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
+ } else if (CC == 7) {
+ Cmp = Constant::getAllOnesValue(
+ llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
+ } else {
+ ICmpInst::Predicate Pred;
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case 0: Pred = ICmpInst::ICMP_EQ; break;
+ case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ case 4: Pred = ICmpInst::ICMP_NE; break;
+ case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ }
+ Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
+ }
+
+ const auto *C = dyn_cast<Constant>(Ops.back());
+ if (!C || !C->isAllOnesValue())
+ Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
+
+ if (NumElts < 8) {
+ uint32_t Indices[8];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i;
+ for (unsigned i = NumElts; i != 8; ++i)
+ Indices[i] = i % NumElts + NumElts;
+ Cmp = CGF.Builder.CreateShuffleVector(
+ Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
+ }
+ return CGF.Builder.CreateBitCast(Cmp,
+ IntegerType::get(CGF.getLLVMContext(),
+ std::max(NumElts, 8U)));
+}
+
Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
if (BuiltinID == X86::BI__builtin_ms_va_start ||
@@ -6160,6 +6726,31 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
}
+ // These exist so that the builtin that takes an immediate can be bounds
+ // checked by clang to avoid passing bad immediates to the backend. Since
+ // AVX has a larger immediate than SSE we would need separate builtins to
+ // do the different bounds checking. Rather than create a clang specific
+ // SSE only builtin, this implements eight separate builtins to match gcc
+ // implementation.
+ auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
+ Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
+ llvm::Function *F = CGM.getIntrinsic(ID);
+ return Builder.CreateCall(F, Ops);
+ };
+
+ // For the vector forms of FP comparisons, translate the builtins directly to
+ // IR.
+ // TODO: The builtins could be removed if the SSE header files used vector
+ // extension comparisons directly (vector ordered/unordered may need
+ // additional support via __builtin_isnan()).
+ auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
+ Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+ llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
+ llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
+ Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
+ return Builder.CreateBitCast(Sext, FPVecTy);
+ };
+
switch (BuiltinID) {
default: return nullptr;
case X86::BI__builtin_cpu_supports: {
@@ -6188,6 +6779,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
AVX512F,
BMI,
BMI2,
+ AES,
+ PCLMUL,
+ AVX512VL,
+ AVX512BW,
+ AVX512DQ,
+ AVX512CD,
+ AVX512ER,
+ AVX512PF,
+ AVX512VBMI,
+ AVX512IFMA,
MAX
};
@@ -6198,6 +6799,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
.Case("sse", X86Features::SSE)
.Case("sse2", X86Features::SSE2)
.Case("sse3", X86Features::SSE3)
+ .Case("ssse3", X86Features::SSSE3)
.Case("sse4.1", X86Features::SSE4_1)
.Case("sse4.2", X86Features::SSE4_2)
.Case("avx", X86Features::AVX)
@@ -6209,6 +6811,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
.Case("avx512f", X86Features::AVX512F)
.Case("bmi", X86Features::BMI)
.Case("bmi2", X86Features::BMI2)
+ .Case("aes", X86Features::AES)
+ .Case("pclmul", X86Features::PCLMUL)
+ .Case("avx512vl", X86Features::AVX512VL)
+ .Case("avx512bw", X86Features::AVX512BW)
+ .Case("avx512dq", X86Features::AVX512DQ)
+ .Case("avx512cd", X86Features::AVX512CD)
+ .Case("avx512er", X86Features::AVX512ER)
+ .Case("avx512pf", X86Features::AVX512PF)
+ .Case("avx512vbmi", X86Features::AVX512VBMI)
+ .Case("avx512ifma", X86Features::AVX512IFMA)
.Default(X86Features::MAX);
assert(Feature != X86Features::MAX && "Invalid feature!");
@@ -6237,7 +6849,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Check the value of the bit corresponding to the feature requested.
Value *Bitset = Builder.CreateAnd(
- Features, llvm::ConstantInt::get(Int32Ty, 1 << Feature));
+ Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
}
case X86::BI_mm_prefetch: {
@@ -6312,6 +6924,78 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Ops.push_back(Mlo);
return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
}
+ case X86::BI__builtin_ia32_storedqudi128_mask:
+ case X86::BI__builtin_ia32_storedqusi128_mask:
+ case X86::BI__builtin_ia32_storedquhi128_mask:
+ case X86::BI__builtin_ia32_storedquqi128_mask:
+ case X86::BI__builtin_ia32_storeupd128_mask:
+ case X86::BI__builtin_ia32_storeups128_mask:
+ case X86::BI__builtin_ia32_storedqudi256_mask:
+ case X86::BI__builtin_ia32_storedqusi256_mask:
+ case X86::BI__builtin_ia32_storedquhi256_mask:
+ case X86::BI__builtin_ia32_storedquqi256_mask:
+ case X86::BI__builtin_ia32_storeupd256_mask:
+ case X86::BI__builtin_ia32_storeups256_mask:
+ case X86::BI__builtin_ia32_storedqudi512_mask:
+ case X86::BI__builtin_ia32_storedqusi512_mask:
+ case X86::BI__builtin_ia32_storedquhi512_mask:
+ case X86::BI__builtin_ia32_storedquqi512_mask:
+ case X86::BI__builtin_ia32_storeupd512_mask:
+ case X86::BI__builtin_ia32_storeups512_mask:
+ return EmitX86MaskedStore(*this, Ops, 1);
+
+ case X86::BI__builtin_ia32_movdqa32store128_mask:
+ case X86::BI__builtin_ia32_movdqa64store128_mask:
+ case X86::BI__builtin_ia32_storeaps128_mask:
+ case X86::BI__builtin_ia32_storeapd128_mask:
+ case X86::BI__builtin_ia32_movdqa32store256_mask:
+ case X86::BI__builtin_ia32_movdqa64store256_mask:
+ case X86::BI__builtin_ia32_storeaps256_mask:
+ case X86::BI__builtin_ia32_storeapd256_mask:
+ case X86::BI__builtin_ia32_movdqa32store512_mask:
+ case X86::BI__builtin_ia32_movdqa64store512_mask:
+ case X86::BI__builtin_ia32_storeaps512_mask:
+ case X86::BI__builtin_ia32_storeapd512_mask: {
+ unsigned Align =
+ getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
+ return EmitX86MaskedStore(*this, Ops, Align);
+ }
+ case X86::BI__builtin_ia32_loadups128_mask:
+ case X86::BI__builtin_ia32_loadups256_mask:
+ case X86::BI__builtin_ia32_loadups512_mask:
+ case X86::BI__builtin_ia32_loadupd128_mask:
+ case X86::BI__builtin_ia32_loadupd256_mask:
+ case X86::BI__builtin_ia32_loadupd512_mask:
+ case X86::BI__builtin_ia32_loaddquqi128_mask:
+ case X86::BI__builtin_ia32_loaddquqi256_mask:
+ case X86::BI__builtin_ia32_loaddquqi512_mask:
+ case X86::BI__builtin_ia32_loaddquhi128_mask:
+ case X86::BI__builtin_ia32_loaddquhi256_mask:
+ case X86::BI__builtin_ia32_loaddquhi512_mask:
+ case X86::BI__builtin_ia32_loaddqusi128_mask:
+ case X86::BI__builtin_ia32_loaddqusi256_mask:
+ case X86::BI__builtin_ia32_loaddqusi512_mask:
+ case X86::BI__builtin_ia32_loaddqudi128_mask:
+ case X86::BI__builtin_ia32_loaddqudi256_mask:
+ case X86::BI__builtin_ia32_loaddqudi512_mask:
+ return EmitX86MaskedLoad(*this, Ops, 1);
+
+ case X86::BI__builtin_ia32_loadaps128_mask:
+ case X86::BI__builtin_ia32_loadaps256_mask:
+ case X86::BI__builtin_ia32_loadaps512_mask:
+ case X86::BI__builtin_ia32_loadapd128_mask:
+ case X86::BI__builtin_ia32_loadapd256_mask:
+ case X86::BI__builtin_ia32_loadapd512_mask:
+ case X86::BI__builtin_ia32_movdqa32load128_mask:
+ case X86::BI__builtin_ia32_movdqa32load256_mask:
+ case X86::BI__builtin_ia32_movdqa32load512_mask:
+ case X86::BI__builtin_ia32_movdqa64load128_mask:
+ case X86::BI__builtin_ia32_movdqa64load256_mask:
+ case X86::BI__builtin_ia32_movdqa64load512_mask: {
+ unsigned Align =
+ getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
+ return EmitX86MaskedLoad(*this, Ops, Align);
+ }
case X86::BI__builtin_ia32_storehps:
case X86::BI__builtin_ia32_storelps: {
llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
@@ -6330,103 +7014,50 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case X86::BI__builtin_ia32_palignr128:
- case X86::BI__builtin_ia32_palignr256: {
+ case X86::BI__builtin_ia32_palignr256:
+ case X86::BI__builtin_ia32_palignr128_mask:
+ case X86::BI__builtin_ia32_palignr256_mask:
+ case X86::BI__builtin_ia32_palignr512_mask: {
unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
assert(NumElts % 16 == 0);
- unsigned NumLanes = NumElts / 16;
- unsigned NumLaneElts = NumElts / NumLanes;
// If palignr is shifting the pair of vectors more than the size of two
// lanes, emit zero.
- if (ShiftVal >= (2 * NumLaneElts))
+ if (ShiftVal >= 32)
return llvm::Constant::getNullValue(ConvertType(E->getType()));
// If palignr is shifting the pair of input vectors more than one lane,
// but less than two lanes, convert to shifting in zeroes.
- if (ShiftVal > NumLaneElts) {
- ShiftVal -= NumLaneElts;
+ if (ShiftVal > 16) {
+ ShiftVal -= 16;
Ops[1] = Ops[0];
Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
}
- uint32_t Indices[32];
+ uint32_t Indices[64];
// 256-bit palignr operates on 128-bit lanes so we need to handle that
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0; i != NumLaneElts; ++i) {
+ for (unsigned l = 0; l != NumElts; l += 16) {
+ for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = ShiftVal + i;
- if (Idx >= NumLaneElts)
- Idx += NumElts - NumLaneElts; // End of lane, switch operand.
+ if (Idx >= 16)
+ Idx += NumElts - 16; // End of lane, switch operand.
Indices[l + i] = Idx + l;
}
}
- Value *SV = llvm::ConstantDataVector::get(getLLVMContext(),
- makeArrayRef(Indices, NumElts));
- return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
- }
- case X86::BI__builtin_ia32_pslldqi256: {
- // Shift value is in bits so divide by 8.
- unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
+ Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
+ makeArrayRef(Indices, NumElts),
+ "palignr");
- // If pslldq is shifting the vector more than 15 bytes, emit zero.
- if (shiftVal >= 16)
- return llvm::Constant::getNullValue(ConvertType(E->getType()));
-
- uint32_t Indices[32];
- // 256-bit pslldq operates on 128-bit lanes so we need to handle that
- for (unsigned l = 0; l != 32; l += 16) {
- for (unsigned i = 0; i != 16; ++i) {
- unsigned Idx = 32 + i - shiftVal;
- if (Idx < 32) Idx -= 16; // end of lane, switch operand.
- Indices[l + i] = Idx + l;
- }
- }
-
- llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
- Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
- Value *Zero = llvm::Constant::getNullValue(VecTy);
+ // If this isn't a masked builtin, just return the align operation.
+ if (Ops.size() == 3)
+ return Align;
- Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
- SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq");
- llvm::Type *ResultType = ConvertType(E->getType());
- return Builder.CreateBitCast(SV, ResultType, "cast");
+ return EmitX86Select(*this, Ops[4], Align, Ops[3]);
}
- case X86::BI__builtin_ia32_psrldqi256: {
- // Shift value is in bits so divide by 8.
- unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
-
- // If psrldq is shifting the vector more than 15 bytes, emit zero.
- if (shiftVal >= 16)
- return llvm::Constant::getNullValue(ConvertType(E->getType()));
- uint32_t Indices[32];
- // 256-bit psrldq operates on 128-bit lanes so we need to handle that
- for (unsigned l = 0; l != 32; l += 16) {
- for (unsigned i = 0; i != 16; ++i) {
- unsigned Idx = i + shiftVal;
- if (Idx >= 16) Idx += 16; // end of lane, switch operand.
- Indices[l + i] = Idx + l;
- }
- }
-
- llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
- Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
- Value *Zero = llvm::Constant::getNullValue(VecTy);
-
- Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
- SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq");
- llvm::Type *ResultType = ConvertType(E->getType());
- return Builder.CreateBitCast(SV, ResultType, "cast");
- }
- case X86::BI__builtin_ia32_movntps:
- case X86::BI__builtin_ia32_movntps256:
- case X86::BI__builtin_ia32_movntpd:
- case X86::BI__builtin_ia32_movntpd256:
- case X86::BI__builtin_ia32_movntdq:
- case X86::BI__builtin_ia32_movntdq256:
case X86::BI__builtin_ia32_movnti:
case X86::BI__builtin_ia32_movnti64: {
llvm::MDNode *Node = llvm::MDNode::get(
@@ -6439,17 +7070,156 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
- // If the operand is an integer, we can't assume alignment. Otherwise,
- // assume natural alignment.
- QualType ArgTy = E->getArg(1)->getType();
- unsigned Align;
- if (ArgTy->isIntegerType())
- Align = 1;
- else
- Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
- SI->setAlignment(Align);
+ // No alignment for scalar intrinsic store.
+ SI->setAlignment(1);
+ return SI;
+ }
+ case X86::BI__builtin_ia32_movntsd:
+ case X86::BI__builtin_ia32_movntss: {
+ llvm::MDNode *Node = llvm::MDNode::get(
+ getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
+
+ // Extract the 0'th element of the source vector.
+ Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
+
+ // Convert the type of the pointer to a pointer to the stored type.
+ Value *BC = Builder.CreateBitCast(Ops[0],
+ llvm::PointerType::getUnqual(Scl->getType()),
+ "cast");
+
+ // Unaligned nontemporal store of the scalar value.
+ StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
+ SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
+ SI->setAlignment(1);
return SI;
}
+
+ case X86::BI__builtin_ia32_selectb_128:
+ case X86::BI__builtin_ia32_selectb_256:
+ case X86::BI__builtin_ia32_selectb_512:
+ case X86::BI__builtin_ia32_selectw_128:
+ case X86::BI__builtin_ia32_selectw_256:
+ case X86::BI__builtin_ia32_selectw_512:
+ case X86::BI__builtin_ia32_selectd_128:
+ case X86::BI__builtin_ia32_selectd_256:
+ case X86::BI__builtin_ia32_selectd_512:
+ case X86::BI__builtin_ia32_selectq_128:
+ case X86::BI__builtin_ia32_selectq_256:
+ case X86::BI__builtin_ia32_selectq_512:
+ case X86::BI__builtin_ia32_selectps_128:
+ case X86::BI__builtin_ia32_selectps_256:
+ case X86::BI__builtin_ia32_selectps_512:
+ case X86::BI__builtin_ia32_selectpd_128:
+ case X86::BI__builtin_ia32_selectpd_256:
+ case X86::BI__builtin_ia32_selectpd_512:
+ return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
+ case X86::BI__builtin_ia32_pcmpeqb128_mask:
+ case X86::BI__builtin_ia32_pcmpeqb256_mask:
+ case X86::BI__builtin_ia32_pcmpeqb512_mask:
+ case X86::BI__builtin_ia32_pcmpeqw128_mask:
+ case X86::BI__builtin_ia32_pcmpeqw256_mask:
+ case X86::BI__builtin_ia32_pcmpeqw512_mask:
+ case X86::BI__builtin_ia32_pcmpeqd128_mask:
+ case X86::BI__builtin_ia32_pcmpeqd256_mask:
+ case X86::BI__builtin_ia32_pcmpeqd512_mask:
+ case X86::BI__builtin_ia32_pcmpeqq128_mask:
+ case X86::BI__builtin_ia32_pcmpeqq256_mask:
+ case X86::BI__builtin_ia32_pcmpeqq512_mask:
+ return EmitX86MaskedCompare(*this, 0, false, Ops);
+ case X86::BI__builtin_ia32_pcmpgtb128_mask:
+ case X86::BI__builtin_ia32_pcmpgtb256_mask:
+ case X86::BI__builtin_ia32_pcmpgtb512_mask:
+ case X86::BI__builtin_ia32_pcmpgtw128_mask:
+ case X86::BI__builtin_ia32_pcmpgtw256_mask:
+ case X86::BI__builtin_ia32_pcmpgtw512_mask:
+ case X86::BI__builtin_ia32_pcmpgtd128_mask:
+ case X86::BI__builtin_ia32_pcmpgtd256_mask:
+ case X86::BI__builtin_ia32_pcmpgtd512_mask:
+ case X86::BI__builtin_ia32_pcmpgtq128_mask:
+ case X86::BI__builtin_ia32_pcmpgtq256_mask:
+ case X86::BI__builtin_ia32_pcmpgtq512_mask:
+ return EmitX86MaskedCompare(*this, 6, true, Ops);
+ case X86::BI__builtin_ia32_cmpb128_mask:
+ case X86::BI__builtin_ia32_cmpb256_mask:
+ case X86::BI__builtin_ia32_cmpb512_mask:
+ case X86::BI__builtin_ia32_cmpw128_mask:
+ case X86::BI__builtin_ia32_cmpw256_mask:
+ case X86::BI__builtin_ia32_cmpw512_mask:
+ case X86::BI__builtin_ia32_cmpd128_mask:
+ case X86::BI__builtin_ia32_cmpd256_mask:
+ case X86::BI__builtin_ia32_cmpd512_mask:
+ case X86::BI__builtin_ia32_cmpq128_mask:
+ case X86::BI__builtin_ia32_cmpq256_mask:
+ case X86::BI__builtin_ia32_cmpq512_mask: {
+ unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
+ return EmitX86MaskedCompare(*this, CC, true, Ops);
+ }
+ case X86::BI__builtin_ia32_ucmpb128_mask:
+ case X86::BI__builtin_ia32_ucmpb256_mask:
+ case X86::BI__builtin_ia32_ucmpb512_mask:
+ case X86::BI__builtin_ia32_ucmpw128_mask:
+ case X86::BI__builtin_ia32_ucmpw256_mask:
+ case X86::BI__builtin_ia32_ucmpw512_mask:
+ case X86::BI__builtin_ia32_ucmpd128_mask:
+ case X86::BI__builtin_ia32_ucmpd256_mask:
+ case X86::BI__builtin_ia32_ucmpd512_mask:
+ case X86::BI__builtin_ia32_ucmpq128_mask:
+ case X86::BI__builtin_ia32_ucmpq256_mask:
+ case X86::BI__builtin_ia32_ucmpq512_mask: {
+ unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
+ return EmitX86MaskedCompare(*this, CC, false, Ops);
+ }
+
+ case X86::BI__builtin_ia32_vplzcntd_128_mask:
+ case X86::BI__builtin_ia32_vplzcntd_256_mask:
+ case X86::BI__builtin_ia32_vplzcntd_512_mask:
+ case X86::BI__builtin_ia32_vplzcntq_128_mask:
+ case X86::BI__builtin_ia32_vplzcntq_256_mask:
+ case X86::BI__builtin_ia32_vplzcntq_512_mask: {
+ Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
+ return EmitX86Select(*this, Ops[2],
+ Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
+ Ops[1]);
+ }
+
+ // TODO: Handle 64/512-bit vector widths of min/max.
+ case X86::BI__builtin_ia32_pmaxsb128:
+ case X86::BI__builtin_ia32_pmaxsw128:
+ case X86::BI__builtin_ia32_pmaxsd128:
+ case X86::BI__builtin_ia32_pmaxsb256:
+ case X86::BI__builtin_ia32_pmaxsw256:
+ case X86::BI__builtin_ia32_pmaxsd256: {
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]);
+ return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
+ }
+ case X86::BI__builtin_ia32_pmaxub128:
+ case X86::BI__builtin_ia32_pmaxuw128:
+ case X86::BI__builtin_ia32_pmaxud128:
+ case X86::BI__builtin_ia32_pmaxub256:
+ case X86::BI__builtin_ia32_pmaxuw256:
+ case X86::BI__builtin_ia32_pmaxud256: {
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
+ return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
+ }
+ case X86::BI__builtin_ia32_pminsb128:
+ case X86::BI__builtin_ia32_pminsw128:
+ case X86::BI__builtin_ia32_pminsd128:
+ case X86::BI__builtin_ia32_pminsb256:
+ case X86::BI__builtin_ia32_pminsw256:
+ case X86::BI__builtin_ia32_pminsd256: {
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]);
+ return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
+ }
+ case X86::BI__builtin_ia32_pminub128:
+ case X86::BI__builtin_ia32_pminuw128:
+ case X86::BI__builtin_ia32_pminud128:
+ case X86::BI__builtin_ia32_pminub256:
+ case X86::BI__builtin_ia32_pminuw256:
+ case X86::BI__builtin_ia32_pminud256: {
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]);
+ return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
+ }
+
// 3DNow!
case X86::BI__builtin_ia32_pswapdsf:
case X86::BI__builtin_ia32_pswapdsi: {
@@ -6492,154 +7262,107 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Ops[0]);
return Builder.CreateExtractValue(Call, 1);
}
- // SSE comparison intrisics
+
+ // SSE packed comparison intrinsics
case X86::BI__builtin_ia32_cmpeqps:
+ case X86::BI__builtin_ia32_cmpeqpd:
+ return getVectorFCmpIR(CmpInst::FCMP_OEQ);
case X86::BI__builtin_ia32_cmpltps:
+ case X86::BI__builtin_ia32_cmpltpd:
+ return getVectorFCmpIR(CmpInst::FCMP_OLT);
case X86::BI__builtin_ia32_cmpleps:
+ case X86::BI__builtin_ia32_cmplepd:
+ return getVectorFCmpIR(CmpInst::FCMP_OLE);
case X86::BI__builtin_ia32_cmpunordps:
+ case X86::BI__builtin_ia32_cmpunordpd:
+ return getVectorFCmpIR(CmpInst::FCMP_UNO);
case X86::BI__builtin_ia32_cmpneqps:
+ case X86::BI__builtin_ia32_cmpneqpd:
+ return getVectorFCmpIR(CmpInst::FCMP_UNE);
case X86::BI__builtin_ia32_cmpnltps:
+ case X86::BI__builtin_ia32_cmpnltpd:
+ return getVectorFCmpIR(CmpInst::FCMP_UGE);
case X86::BI__builtin_ia32_cmpnleps:
+ case X86::BI__builtin_ia32_cmpnlepd:
+ return getVectorFCmpIR(CmpInst::FCMP_UGT);
case X86::BI__builtin_ia32_cmpordps:
+ case X86::BI__builtin_ia32_cmpordpd:
+ return getVectorFCmpIR(CmpInst::FCMP_ORD);
+ case X86::BI__builtin_ia32_cmpps:
+ case X86::BI__builtin_ia32_cmpps256:
+ case X86::BI__builtin_ia32_cmppd:
+ case X86::BI__builtin_ia32_cmppd256: {
+ unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ // If this one of the SSE immediates, we can use native IR.
+ if (CC < 8) {
+ FCmpInst::Predicate Pred;
+ switch (CC) {
+ case 0: Pred = FCmpInst::FCMP_OEQ; break;
+ case 1: Pred = FCmpInst::FCMP_OLT; break;
+ case 2: Pred = FCmpInst::FCMP_OLE; break;
+ case 3: Pred = FCmpInst::FCMP_UNO; break;
+ case 4: Pred = FCmpInst::FCMP_UNE; break;
+ case 5: Pred = FCmpInst::FCMP_UGE; break;
+ case 6: Pred = FCmpInst::FCMP_UGT; break;
+ case 7: Pred = FCmpInst::FCMP_ORD; break;
+ }
+ return getVectorFCmpIR(Pred);
+ }
+
+ // We can't handle 8-31 immediates with native IR, use the intrinsic.
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_cmpps:
+ ID = Intrinsic::x86_sse_cmp_ps;
+ break;
+ case X86::BI__builtin_ia32_cmpps256:
+ ID = Intrinsic::x86_avx_cmp_ps_256;
+ break;
+ case X86::BI__builtin_ia32_cmppd:
+ ID = Intrinsic::x86_sse2_cmp_pd;
+ break;
+ case X86::BI__builtin_ia32_cmppd256:
+ ID = Intrinsic::x86_avx_cmp_pd_256;
+ break;
+ }
+
+ return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+ }
+
+ // SSE scalar comparison intrinsics
case X86::BI__builtin_ia32_cmpeqss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
case X86::BI__builtin_ia32_cmpltss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
case X86::BI__builtin_ia32_cmpless:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
case X86::BI__builtin_ia32_cmpunordss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
case X86::BI__builtin_ia32_cmpneqss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
case X86::BI__builtin_ia32_cmpnltss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
case X86::BI__builtin_ia32_cmpnless:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
case X86::BI__builtin_ia32_cmpordss:
- case X86::BI__builtin_ia32_cmpeqpd:
- case X86::BI__builtin_ia32_cmpltpd:
- case X86::BI__builtin_ia32_cmplepd:
- case X86::BI__builtin_ia32_cmpunordpd:
- case X86::BI__builtin_ia32_cmpneqpd:
- case X86::BI__builtin_ia32_cmpnltpd:
- case X86::BI__builtin_ia32_cmpnlepd:
- case X86::BI__builtin_ia32_cmpordpd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
case X86::BI__builtin_ia32_cmpeqsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
case X86::BI__builtin_ia32_cmpltsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
case X86::BI__builtin_ia32_cmplesd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
case X86::BI__builtin_ia32_cmpunordsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
case X86::BI__builtin_ia32_cmpneqsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
case X86::BI__builtin_ia32_cmpnltsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
case X86::BI__builtin_ia32_cmpnlesd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
case X86::BI__builtin_ia32_cmpordsd:
- // These exist so that the builtin that takes an immediate can be bounds
- // checked by clang to avoid passing bad immediates to the backend. Since
- // AVX has a larger immediate than SSE we would need separate builtins to
- // do the different bounds checking. Rather than create a clang specific
- // SSE only builtin, this implements eight separate builtins to match gcc
- // implementation.
-
- // Choose the immediate.
- unsigned Imm;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cmpeqps:
- case X86::BI__builtin_ia32_cmpeqss:
- case X86::BI__builtin_ia32_cmpeqpd:
- case X86::BI__builtin_ia32_cmpeqsd:
- Imm = 0;
- break;
- case X86::BI__builtin_ia32_cmpltps:
- case X86::BI__builtin_ia32_cmpltss:
- case X86::BI__builtin_ia32_cmpltpd:
- case X86::BI__builtin_ia32_cmpltsd:
- Imm = 1;
- break;
- case X86::BI__builtin_ia32_cmpleps:
- case X86::BI__builtin_ia32_cmpless:
- case X86::BI__builtin_ia32_cmplepd:
- case X86::BI__builtin_ia32_cmplesd:
- Imm = 2;
- break;
- case X86::BI__builtin_ia32_cmpunordps:
- case X86::BI__builtin_ia32_cmpunordss:
- case X86::BI__builtin_ia32_cmpunordpd:
- case X86::BI__builtin_ia32_cmpunordsd:
- Imm = 3;
- break;
- case X86::BI__builtin_ia32_cmpneqps:
- case X86::BI__builtin_ia32_cmpneqss:
- case X86::BI__builtin_ia32_cmpneqpd:
- case X86::BI__builtin_ia32_cmpneqsd:
- Imm = 4;
- break;
- case X86::BI__builtin_ia32_cmpnltps:
- case X86::BI__builtin_ia32_cmpnltss:
- case X86::BI__builtin_ia32_cmpnltpd:
- case X86::BI__builtin_ia32_cmpnltsd:
- Imm = 5;
- break;
- case X86::BI__builtin_ia32_cmpnleps:
- case X86::BI__builtin_ia32_cmpnless:
- case X86::BI__builtin_ia32_cmpnlepd:
- case X86::BI__builtin_ia32_cmpnlesd:
- Imm = 6;
- break;
- case X86::BI__builtin_ia32_cmpordps:
- case X86::BI__builtin_ia32_cmpordss:
- case X86::BI__builtin_ia32_cmpordpd:
- case X86::BI__builtin_ia32_cmpordsd:
- Imm = 7;
- break;
- }
-
- // Choose the intrinsic ID.
- const char *name;
- Intrinsic::ID ID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cmpeqps:
- case X86::BI__builtin_ia32_cmpltps:
- case X86::BI__builtin_ia32_cmpleps:
- case X86::BI__builtin_ia32_cmpunordps:
- case X86::BI__builtin_ia32_cmpneqps:
- case X86::BI__builtin_ia32_cmpnltps:
- case X86::BI__builtin_ia32_cmpnleps:
- case X86::BI__builtin_ia32_cmpordps:
- name = "cmpps";
- ID = Intrinsic::x86_sse_cmp_ps;
- break;
- case X86::BI__builtin_ia32_cmpeqss:
- case X86::BI__builtin_ia32_cmpltss:
- case X86::BI__builtin_ia32_cmpless:
- case X86::BI__builtin_ia32_cmpunordss:
- case X86::BI__builtin_ia32_cmpneqss:
- case X86::BI__builtin_ia32_cmpnltss:
- case X86::BI__builtin_ia32_cmpnless:
- case X86::BI__builtin_ia32_cmpordss:
- name = "cmpss";
- ID = Intrinsic::x86_sse_cmp_ss;
- break;
- case X86::BI__builtin_ia32_cmpeqpd:
- case X86::BI__builtin_ia32_cmpltpd:
- case X86::BI__builtin_ia32_cmplepd:
- case X86::BI__builtin_ia32_cmpunordpd:
- case X86::BI__builtin_ia32_cmpneqpd:
- case X86::BI__builtin_ia32_cmpnltpd:
- case X86::BI__builtin_ia32_cmpnlepd:
- case X86::BI__builtin_ia32_cmpordpd:
- name = "cmppd";
- ID = Intrinsic::x86_sse2_cmp_pd;
- break;
- case X86::BI__builtin_ia32_cmpeqsd:
- case X86::BI__builtin_ia32_cmpltsd:
- case X86::BI__builtin_ia32_cmplesd:
- case X86::BI__builtin_ia32_cmpunordsd:
- case X86::BI__builtin_ia32_cmpneqsd:
- case X86::BI__builtin_ia32_cmpnltsd:
- case X86::BI__builtin_ia32_cmpnlesd:
- case X86::BI__builtin_ia32_cmpordsd:
- name = "cmpsd";
- ID = Intrinsic::x86_sse2_cmp_sd;
- break;
- }
-
- Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
- llvm::Function *F = CGM.getIntrinsic(ID);
- return Builder.CreateCall(F, Ops, name);
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
}
}
@@ -6812,6 +7535,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
return Builder.CreateCall(F, X);
}
+
+ // Absolute value
+ case PPC::BI__builtin_vsx_xvabsdp:
+ case PPC::BI__builtin_vsx_xvabssp: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *X = EmitScalarExpr(E->getArg(0));
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
+ return Builder.CreateCall(F, X);
+ }
+
// FMA variations
case PPC::BI__builtin_vsx_xvmaddadp:
case PPC::BI__builtin_vsx_xvmaddasp:
@@ -6851,44 +7584,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
}
}
-// Emit an intrinsic that has 1 float or double.
-static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF,
- const CallExpr *E,
- unsigned IntrinsicID) {
- llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
-
- Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
- return CGF.Builder.CreateCall(F, Src0);
-}
-
-// Emit an intrinsic that has 3 float or double operands.
-static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF,
- const CallExpr *E,
- unsigned IntrinsicID) {
- llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
-
- Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
- return CGF.Builder.CreateCall(F, {Src0, Src1, Src2});
-}
-
-// Emit an intrinsic that has 1 float or double operand, and 1 integer.
-static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
- const CallExpr *E,
- unsigned IntrinsicID) {
- llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
-
- Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
- return CGF.Builder.CreateCall(F, {Src0, Src1});
-}
-
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgpu_div_scale:
- case AMDGPU::BI__builtin_amdgpu_div_scalef: {
+ case AMDGPU::BI__builtin_amdgcn_div_scale:
+ case AMDGPU::BI__builtin_amdgcn_div_scalef: {
// Translate from the intrinsics's struct return to the builtin's out
// argument.
@@ -6898,7 +7598,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
llvm::Value *Y = EmitScalarExpr(E->getArg(1));
llvm::Value *Z = EmitScalarExpr(E->getArg(2));
- llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
+ llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
X->getType());
llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
@@ -6913,40 +7613,85 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Builder.CreateStore(FlagExt, FlagOutPtr);
return Result;
}
- case AMDGPU::BI__builtin_amdgpu_div_fmas:
- case AMDGPU::BI__builtin_amdgpu_div_fmasf: {
+ case AMDGPU::BI__builtin_amdgcn_div_fmas:
+ case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
- llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas,
+ llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
Src0->getType());
llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
}
- case AMDGPU::BI__builtin_amdgpu_div_fixup:
- case AMDGPU::BI__builtin_amdgpu_div_fixupf:
- return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
- case AMDGPU::BI__builtin_amdgpu_trig_preop:
- case AMDGPU::BI__builtin_amdgpu_trig_preopf:
- return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop);
- case AMDGPU::BI__builtin_amdgpu_rcp:
- case AMDGPU::BI__builtin_amdgpu_rcpf:
- return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
- case AMDGPU::BI__builtin_amdgpu_rsq:
- case AMDGPU::BI__builtin_amdgpu_rsqf:
- return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
- case AMDGPU::BI__builtin_amdgpu_rsq_clamped:
- case AMDGPU::BI__builtin_amdgpu_rsq_clampedf:
- return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
- case AMDGPU::BI__builtin_amdgpu_ldexp:
- case AMDGPU::BI__builtin_amdgpu_ldexpf:
- return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
- case AMDGPU::BI__builtin_amdgpu_class:
- case AMDGPU::BI__builtin_amdgpu_classf:
- return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
- default:
+ case AMDGPU::BI__builtin_amdgcn_div_fixup:
+ case AMDGPU::BI__builtin_amdgcn_div_fixupf:
+ return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
+ case AMDGPU::BI__builtin_amdgcn_trig_preop:
+ case AMDGPU::BI__builtin_amdgcn_trig_preopf:
+ return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
+ case AMDGPU::BI__builtin_amdgcn_rcp:
+ case AMDGPU::BI__builtin_amdgcn_rcpf:
+ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
+ case AMDGPU::BI__builtin_amdgcn_rsq:
+ case AMDGPU::BI__builtin_amdgcn_rsqf:
+ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
+ case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
+ case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
+ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
+ case AMDGPU::BI__builtin_amdgcn_sinf:
+ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
+ case AMDGPU::BI__builtin_amdgcn_cosf:
+ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
+ case AMDGPU::BI__builtin_amdgcn_log_clampf:
+ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
+ case AMDGPU::BI__builtin_amdgcn_ldexp:
+ case AMDGPU::BI__builtin_amdgcn_ldexpf:
+ return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
+ case AMDGPU::BI__builtin_amdgcn_frexp_mant:
+ case AMDGPU::BI__builtin_amdgcn_frexp_mantf: {
+ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
+ }
+ case AMDGPU::BI__builtin_amdgcn_frexp_exp:
+ case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
+ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp);
+ }
+ case AMDGPU::BI__builtin_amdgcn_fract:
+ case AMDGPU::BI__builtin_amdgcn_fractf:
+ return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
+ case AMDGPU::BI__builtin_amdgcn_lerp:
+ return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
+ case AMDGPU::BI__builtin_amdgcn_class:
+ case AMDGPU::BI__builtin_amdgcn_classf:
+ return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
+
+ case AMDGPU::BI__builtin_amdgcn_read_exec: {
+ CallInst *CI = cast<CallInst>(
+ EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
+ CI->setConvergent();
+ return CI;
+ }
+
+ // amdgcn workitem
+ case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
+ return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
+ case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
+ return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
+ case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
+ return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
+
+ // r600 intrinsics
+ case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
+ case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
+ return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
+ case AMDGPU::BI__builtin_r600_read_tidig_x:
+ return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
+ case AMDGPU::BI__builtin_r600_read_tidig_y:
+ return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
+ case AMDGPU::BI__builtin_r600_read_tidig_z:
+ return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
+ default:
return nullptr;
}
}
@@ -7196,6 +7941,17 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
+ auto MakeLdg = [&](unsigned IntrinsicID) {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ AlignmentSource AlignSource;
+ clang::CharUnits Align =
+ getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
+ return Builder.CreateCall(
+ CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
+ Ptr->getType()}),
+ {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
+ };
+
switch (BuiltinID) {
case NVPTX::BI__nvvm_atom_add_gen_i:
case NVPTX::BI__nvvm_atom_add_gen_l:
@@ -7264,6 +8020,56 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(FnALAF32, {Ptr, Val});
}
+ case NVPTX::BI__nvvm_atom_inc_gen_ui: {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Val = EmitScalarExpr(E->getArg(1));
+ Value *FnALI32 =
+ CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
+ return Builder.CreateCall(FnALI32, {Ptr, Val});
+ }
+
+ case NVPTX::BI__nvvm_atom_dec_gen_ui: {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Val = EmitScalarExpr(E->getArg(1));
+ Value *FnALD32 =
+ CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
+ return Builder.CreateCall(FnALD32, {Ptr, Val});
+ }
+
+ case NVPTX::BI__nvvm_ldg_c:
+ case NVPTX::BI__nvvm_ldg_c2:
+ case NVPTX::BI__nvvm_ldg_c4:
+ case NVPTX::BI__nvvm_ldg_s:
+ case NVPTX::BI__nvvm_ldg_s2:
+ case NVPTX::BI__nvvm_ldg_s4:
+ case NVPTX::BI__nvvm_ldg_i:
+ case NVPTX::BI__nvvm_ldg_i2:
+ case NVPTX::BI__nvvm_ldg_i4:
+ case NVPTX::BI__nvvm_ldg_l:
+ case NVPTX::BI__nvvm_ldg_ll:
+ case NVPTX::BI__nvvm_ldg_ll2:
+ case NVPTX::BI__nvvm_ldg_uc:
+ case NVPTX::BI__nvvm_ldg_uc2:
+ case NVPTX::BI__nvvm_ldg_uc4:
+ case NVPTX::BI__nvvm_ldg_us:
+ case NVPTX::BI__nvvm_ldg_us2:
+ case NVPTX::BI__nvvm_ldg_us4:
+ case NVPTX::BI__nvvm_ldg_ui:
+ case NVPTX::BI__nvvm_ldg_ui2:
+ case NVPTX::BI__nvvm_ldg_ui4:
+ case NVPTX::BI__nvvm_ldg_ul:
+ case NVPTX::BI__nvvm_ldg_ull:
+ case NVPTX::BI__nvvm_ldg_ull2:
+ // PTX Interoperability section 2.2: "For a vector with an even number of
+ // elements, its alignment is set to number of elements times the alignment
+ // of its member: n*alignof(t)."
+ return MakeLdg(Intrinsic::nvvm_ldg_global_i);
+ case NVPTX::BI__nvvm_ldg_f:
+ case NVPTX::BI__nvvm_ldg_f2:
+ case NVPTX::BI__nvvm_ldg_f4:
+ case NVPTX::BI__nvvm_ldg_d:
+ case NVPTX::BI__nvvm_ldg_d2:
+ return MakeLdg(Intrinsic::nvvm_ldg_global_f);
default:
return nullptr;
}
@@ -7272,9 +8078,9 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_memory_size: {
+ case WebAssembly::BI__builtin_wasm_current_memory: {
llvm::Type *ResultType = ConvertType(E->getType());
- Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
return Builder.CreateCall(Callee);
}
case WebAssembly::BI__builtin_wasm_grow_memory: {
OpenPOWER on IntegriCloud