summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/IR/AutoUpgrade.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/IR/AutoUpgrade.cpp')
-rw-r--r--contrib/llvm/lib/IR/AutoUpgrade.cpp1138
1 files changed, 835 insertions, 303 deletions
diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp
index 2e4a2f8..e3a7bae 100644
--- a/contrib/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp
@@ -31,6 +31,8 @@
#include <cstring>
using namespace llvm;
+static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
+
// Upgrade the declarations of the SSE4.1 functions whose arguments have
// changed their type from v4f32 to v2i64.
static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
@@ -42,7 +44,7 @@ static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
return false;
// Yes, it's old, replace it with new version.
- F->setName(F->getName() + ".old");
+ rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
return true;
}
@@ -58,7 +60,7 @@ static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
return false;
// Move this function aside and map down.
- F->setName(F->getName() + ".old");
+ rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
return true;
}
@@ -75,6 +77,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
switch (Name[0]) {
default: break;
case 'a': {
+ if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
+ F->arg_begin()->getType());
+ return true;
+ }
if (Name.startswith("arm.neon.vclz")) {
Type* args[2] = {
F->arg_begin()->getType(),
@@ -135,25 +142,49 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
case 'c': {
if (Name.startswith("ctlz.") && F->arg_size() == 1) {
- F->setName(Name + ".old");
+ rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("cttz.") && F->arg_size() == 1) {
- F->setName(Name + ".old");
+ rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
F->arg_begin()->getType());
return true;
}
break;
}
-
+ case 'i': {
+ if (Name.startswith("invariant.start")) {
+ auto Args = F->getFunctionType()->params();
+ Type* ObjectPtr[1] = {Args[1]};
+ if (F->getName() !=
+ Intrinsic::getName(Intrinsic::invariant_start, ObjectPtr)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::invariant_start, ObjectPtr);
+ return true;
+ }
+ }
+ if (Name.startswith("invariant.end")) {
+ auto Args = F->getFunctionType()->params();
+ Type* ObjectPtr[1] = {Args[2]};
+ if (F->getName() !=
+ Intrinsic::getName(Intrinsic::invariant_end, ObjectPtr)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::invariant_end, ObjectPtr);
+ return true;
+ }
+ }
+ break;
+ }
case 'm': {
if (Name.startswith("masked.load.")) {
Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
- F->setName(Name + ".old");
+ rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_load,
Tys);
@@ -164,7 +195,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
auto Args = F->getFunctionType()->params();
Type *Tys[] = { Args[0], Args[1] };
if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
- F->setName(Name + ".old");
+ rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_store,
Tys);
@@ -180,7 +211,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
- F->setName(Name + ".old");
+ rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::objectsize, Tys);
return true;
@@ -193,117 +224,174 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFn = nullptr;
return true;
}
+ break;
case 'x': {
bool IsX86 = Name.startswith("x86.");
if (IsX86)
Name = Name.substr(4);
+ // All of the intrinsics matches below should be marked with which llvm
+ // version started autoupgrading them. At some point in the future we would
+ // like to use this information to remove upgrade code for some older
+ // intrinsics. It is currently undecided how we will determine that future
+ // point.
if (IsX86 &&
- (Name.startswith("sse2.pcmpeq.") ||
- Name.startswith("sse2.pcmpgt.") ||
- Name.startswith("avx2.pcmpeq.") ||
- Name.startswith("avx2.pcmpgt.") ||
- Name.startswith("avx512.mask.pcmpeq.") ||
- Name.startswith("avx512.mask.pcmpgt.") ||
- Name == "sse41.pmaxsb" ||
- Name == "sse2.pmaxs.w" ||
- Name == "sse41.pmaxsd" ||
- Name == "sse2.pmaxu.b" ||
- Name == "sse41.pmaxuw" ||
- Name == "sse41.pmaxud" ||
- Name == "sse41.pminsb" ||
- Name == "sse2.pmins.w" ||
- Name == "sse41.pminsd" ||
- Name == "sse2.pminu.b" ||
- Name == "sse41.pminuw" ||
- Name == "sse41.pminud" ||
- Name.startswith("avx2.pmax") ||
- Name.startswith("avx2.pmin") ||
- Name.startswith("avx2.vbroadcast") ||
- Name.startswith("avx2.pbroadcast") ||
- Name.startswith("avx.vpermil.") ||
- Name.startswith("sse2.pshuf") ||
- Name.startswith("avx512.pbroadcast") ||
- Name.startswith("avx512.mask.broadcast.s") ||
- Name.startswith("avx512.mask.movddup") ||
- Name.startswith("avx512.mask.movshdup") ||
- Name.startswith("avx512.mask.movsldup") ||
- Name.startswith("avx512.mask.pshuf.d.") ||
- Name.startswith("avx512.mask.pshufl.w.") ||
- Name.startswith("avx512.mask.pshufh.w.") ||
- Name.startswith("avx512.mask.vpermil.p") ||
- Name.startswith("avx512.mask.perm.df.") ||
- Name.startswith("avx512.mask.perm.di.") ||
- Name.startswith("avx512.mask.punpckl") ||
- Name.startswith("avx512.mask.punpckh") ||
- Name.startswith("avx512.mask.unpckl.") ||
- Name.startswith("avx512.mask.unpckh.") ||
- Name.startswith("avx512.mask.pand.") ||
- Name.startswith("avx512.mask.pandn.") ||
- Name.startswith("avx512.mask.por.") ||
- Name.startswith("avx512.mask.pxor.") ||
- Name.startswith("sse41.pmovsx") ||
- Name.startswith("sse41.pmovzx") ||
- Name.startswith("avx2.pmovsx") ||
- Name.startswith("avx2.pmovzx") ||
- Name == "sse2.cvtdq2pd" ||
- Name == "sse2.cvtps2pd" ||
- Name == "avx.cvtdq2.pd.256" ||
- Name == "avx.cvt.ps2.pd.256" ||
- Name.startswith("avx.vinsertf128.") ||
- Name == "avx2.vinserti128" ||
- Name.startswith("avx.vextractf128.") ||
- Name == "avx2.vextracti128" ||
- Name.startswith("sse4a.movnt.") ||
- Name.startswith("avx.movnt.") ||
- Name.startswith("avx512.storent.") ||
- Name == "sse2.storel.dq" ||
- Name.startswith("sse.storeu.") ||
- Name.startswith("sse2.storeu.") ||
- Name.startswith("avx.storeu.") ||
- Name.startswith("avx512.mask.storeu.p") ||
- Name.startswith("avx512.mask.storeu.b.") ||
- Name.startswith("avx512.mask.storeu.w.") ||
- Name.startswith("avx512.mask.storeu.d.") ||
- Name.startswith("avx512.mask.storeu.q.") ||
- Name.startswith("avx512.mask.store.p") ||
- Name.startswith("avx512.mask.store.b.") ||
- Name.startswith("avx512.mask.store.w.") ||
- Name.startswith("avx512.mask.store.d.") ||
- Name.startswith("avx512.mask.store.q.") ||
- Name.startswith("avx512.mask.loadu.p") ||
- Name.startswith("avx512.mask.loadu.b.") ||
- Name.startswith("avx512.mask.loadu.w.") ||
- Name.startswith("avx512.mask.loadu.d.") ||
- Name.startswith("avx512.mask.loadu.q.") ||
- Name.startswith("avx512.mask.load.p") ||
- Name.startswith("avx512.mask.load.b.") ||
- Name.startswith("avx512.mask.load.w.") ||
- Name.startswith("avx512.mask.load.d.") ||
- Name.startswith("avx512.mask.load.q.") ||
- Name == "sse42.crc32.64.8" ||
- Name.startswith("avx.vbroadcast.s") ||
- Name.startswith("avx512.mask.palignr.") ||
- Name.startswith("sse2.psll.dq") ||
- Name.startswith("sse2.psrl.dq") ||
- Name.startswith("avx2.psll.dq") ||
- Name.startswith("avx2.psrl.dq") ||
- Name.startswith("avx512.psll.dq") ||
- Name.startswith("avx512.psrl.dq") ||
- Name == "sse41.pblendw" ||
- Name.startswith("sse41.blendp") ||
- Name.startswith("avx.blend.p") ||
- Name == "avx2.pblendw" ||
- Name.startswith("avx2.pblendd.") ||
- Name == "avx2.vbroadcasti128" ||
- Name == "xop.vpcmov" ||
- (Name.startswith("xop.vpcom") && F->arg_size() == 2))) {
+ (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
+ Name.startswith("sse2.pcmpgt.") || // Added in 3.1
+ Name.startswith("avx2.pcmpeq.") || // Added in 3.1
+ Name.startswith("avx2.pcmpgt.") || // Added in 3.1
+ Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
+ Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
+ Name == "sse.add.ss" || // Added in 4.0
+ Name == "sse2.add.sd" || // Added in 4.0
+ Name == "sse.sub.ss" || // Added in 4.0
+ Name == "sse2.sub.sd" || // Added in 4.0
+ Name == "sse.mul.ss" || // Added in 4.0
+ Name == "sse2.mul.sd" || // Added in 4.0
+ Name == "sse.div.ss" || // Added in 4.0
+ Name == "sse2.div.sd" || // Added in 4.0
+ Name == "sse41.pmaxsb" || // Added in 3.9
+ Name == "sse2.pmaxs.w" || // Added in 3.9
+ Name == "sse41.pmaxsd" || // Added in 3.9
+ Name == "sse2.pmaxu.b" || // Added in 3.9
+ Name == "sse41.pmaxuw" || // Added in 3.9
+ Name == "sse41.pmaxud" || // Added in 3.9
+ Name == "sse41.pminsb" || // Added in 3.9
+ Name == "sse2.pmins.w" || // Added in 3.9
+ Name == "sse41.pminsd" || // Added in 3.9
+ Name == "sse2.pminu.b" || // Added in 3.9
+ Name == "sse41.pminuw" || // Added in 3.9
+ Name == "sse41.pminud" || // Added in 3.9
+ Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
+ Name.startswith("avx2.pmax") || // Added in 3.9
+ Name.startswith("avx2.pmin") || // Added in 3.9
+ Name.startswith("avx512.mask.pmax") || // Added in 4.0
+ Name.startswith("avx512.mask.pmin") || // Added in 4.0
+ Name.startswith("avx2.vbroadcast") || // Added in 3.8
+ Name.startswith("avx2.pbroadcast") || // Added in 3.8
+ Name.startswith("avx.vpermil.") || // Added in 3.1
+ Name.startswith("sse2.pshuf") || // Added in 3.9
+ Name.startswith("avx512.pbroadcast") || // Added in 3.9
+ Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
+ Name.startswith("avx512.mask.movddup") || // Added in 3.9
+ Name.startswith("avx512.mask.movshdup") || // Added in 3.9
+ Name.startswith("avx512.mask.movsldup") || // Added in 3.9
+ Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
+ Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
+ Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
+ Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
+ Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
+ Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
+ Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
+ Name.startswith("avx512.mask.punpckl") || // Added in 3.9
+ Name.startswith("avx512.mask.punpckh") || // Added in 3.9
+ Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
+ Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
+ Name.startswith("avx512.mask.pand.") || // Added in 3.9
+ Name.startswith("avx512.mask.pandn.") || // Added in 3.9
+ Name.startswith("avx512.mask.por.") || // Added in 3.9
+ Name.startswith("avx512.mask.pxor.") || // Added in 3.9
+ Name.startswith("avx512.mask.and.") || // Added in 3.9
+ Name.startswith("avx512.mask.andn.") || // Added in 3.9
+ Name.startswith("avx512.mask.or.") || // Added in 3.9
+ Name.startswith("avx512.mask.xor.") || // Added in 3.9
+ Name.startswith("avx512.mask.padd.") || // Added in 4.0
+ Name.startswith("avx512.mask.psub.") || // Added in 4.0
+ Name.startswith("avx512.mask.pmull.") || // Added in 4.0
+ Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
+ Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
+ Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
+ Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
+ Name == "avx512.mask.add.pd.128" || // Added in 4.0
+ Name == "avx512.mask.add.pd.256" || // Added in 4.0
+ Name == "avx512.mask.add.ps.128" || // Added in 4.0
+ Name == "avx512.mask.add.ps.256" || // Added in 4.0
+ Name == "avx512.mask.div.pd.128" || // Added in 4.0
+ Name == "avx512.mask.div.pd.256" || // Added in 4.0
+ Name == "avx512.mask.div.ps.128" || // Added in 4.0
+ Name == "avx512.mask.div.ps.256" || // Added in 4.0
+ Name == "avx512.mask.mul.pd.128" || // Added in 4.0
+ Name == "avx512.mask.mul.pd.256" || // Added in 4.0
+ Name == "avx512.mask.mul.ps.128" || // Added in 4.0
+ Name == "avx512.mask.mul.ps.256" || // Added in 4.0
+ Name == "avx512.mask.sub.pd.128" || // Added in 4.0
+ Name == "avx512.mask.sub.pd.256" || // Added in 4.0
+ Name == "avx512.mask.sub.ps.128" || // Added in 4.0
+ Name == "avx512.mask.sub.ps.256" || // Added in 4.0
+ Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
+ Name.startswith("avx512.mask.psll.d") || // Added in 4.0
+ Name.startswith("avx512.mask.psll.q") || // Added in 4.0
+ Name.startswith("avx512.mask.psll.w") || // Added in 4.0
+ Name.startswith("avx512.mask.psra.d") || // Added in 4.0
+ Name.startswith("avx512.mask.psra.q") || // Added in 4.0
+ Name.startswith("avx512.mask.psra.w") || // Added in 4.0
+ Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
+ Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
+ Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
+ Name.startswith("avx512.mask.pslli") || // Added in 4.0
+ Name.startswith("avx512.mask.psrai") || // Added in 4.0
+ Name.startswith("avx512.mask.psrli") || // Added in 4.0
+ Name.startswith("avx512.mask.psllv") || // Added in 4.0
+ Name.startswith("avx512.mask.psrav") || // Added in 4.0
+ Name.startswith("avx512.mask.psrlv") || // Added in 4.0
+ Name.startswith("sse41.pmovsx") || // Added in 3.8
+ Name.startswith("sse41.pmovzx") || // Added in 3.9
+ Name.startswith("avx2.pmovsx") || // Added in 3.9
+ Name.startswith("avx2.pmovzx") || // Added in 3.9
+ Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
+ Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
+ Name == "sse2.cvtdq2pd" || // Added in 3.9
+ Name == "sse2.cvtps2pd" || // Added in 3.9
+ Name == "avx.cvtdq2.pd.256" || // Added in 3.9
+ Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
+ Name.startswith("avx.vinsertf128.") || // Added in 3.7
+ Name == "avx2.vinserti128" || // Added in 3.7
+ Name.startswith("avx512.mask.insert") || // Added in 4.0
+ Name.startswith("avx.vextractf128.") || // Added in 3.7
+ Name == "avx2.vextracti128" || // Added in 3.7
+ Name.startswith("avx512.mask.vextract") || // Added in 4.0
+ Name.startswith("sse4a.movnt.") || // Added in 3.9
+ Name.startswith("avx.movnt.") || // Added in 3.2
+ Name.startswith("avx512.storent.") || // Added in 3.9
+ Name == "sse2.storel.dq" || // Added in 3.9
+ Name.startswith("sse.storeu.") || // Added in 3.9
+ Name.startswith("sse2.storeu.") || // Added in 3.9
+ Name.startswith("avx.storeu.") || // Added in 3.9
+ Name.startswith("avx512.mask.storeu.") || // Added in 3.9
+ Name.startswith("avx512.mask.store.p") || // Added in 3.9
+ Name.startswith("avx512.mask.store.b.") || // Added in 3.9
+ Name.startswith("avx512.mask.store.w.") || // Added in 3.9
+ Name.startswith("avx512.mask.store.d.") || // Added in 3.9
+ Name.startswith("avx512.mask.store.q.") || // Added in 3.9
+ Name.startswith("avx512.mask.loadu.") || // Added in 3.9
+ Name.startswith("avx512.mask.load.") || // Added in 3.9
+ Name == "sse42.crc32.64.8" || // Added in 3.4
+ Name.startswith("avx.vbroadcast.s") || // Added in 3.5
+ Name.startswith("avx512.mask.palignr.") || // Added in 3.9
+ Name.startswith("avx512.mask.valign.") || // Added in 4.0
+ Name.startswith("sse2.psll.dq") || // Added in 3.7
+ Name.startswith("sse2.psrl.dq") || // Added in 3.7
+ Name.startswith("avx2.psll.dq") || // Added in 3.7
+ Name.startswith("avx2.psrl.dq") || // Added in 3.7
+ Name.startswith("avx512.psll.dq") || // Added in 3.9
+ Name.startswith("avx512.psrl.dq") || // Added in 3.9
+ Name == "sse41.pblendw" || // Added in 3.7
+ Name.startswith("sse41.blendp") || // Added in 3.7
+ Name.startswith("avx.blend.p") || // Added in 3.7
+ Name == "avx2.pblendw" || // Added in 3.7
+ Name.startswith("avx2.pblendd.") || // Added in 3.7
+ Name.startswith("avx.vbroadcastf128") || // Added in 4.0
+ Name == "avx2.vbroadcasti128" || // Added in 3.7
+ Name == "xop.vpcmov" || // Added in 3.8
+ Name.startswith("avx512.mask.move.s") || // Added in 4.0
+ (Name.startswith("xop.vpcom") && // Added in 3.2
+ F->arg_size() == 2))) {
NewFn = nullptr;
return true;
}
// SSE4.1 ptest functions may have an old signature.
- if (IsX86 && Name.startswith("sse41.ptest")) {
+ if (IsX86 && Name.startswith("sse41.ptest")) { // Added in 3.2
if (Name.substr(11) == "c")
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
if (Name.substr(11) == "z")
@@ -313,67 +401,44 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
// Several blend and other instructions with masks used the wrong number of
// bits.
- if (IsX86 && Name == "sse41.insertps")
+ if (IsX86 && Name == "sse41.insertps") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
NewFn);
- if (IsX86 && Name == "sse41.dppd")
+ if (IsX86 && Name == "sse41.dppd") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
NewFn);
- if (IsX86 && Name == "sse41.dpps")
+ if (IsX86 && Name == "sse41.dpps") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
NewFn);
- if (IsX86 && Name == "sse41.mpsadbw")
+ if (IsX86 && Name == "sse41.mpsadbw") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
NewFn);
- if (IsX86 && Name == "avx.dp.ps.256")
+ if (IsX86 && Name == "avx.dp.ps.256") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
NewFn);
- if (IsX86 && Name == "avx2.mpsadbw")
+ if (IsX86 && Name == "avx2.mpsadbw") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
NewFn);
- // frcz.ss/sd may need to have an argument dropped
+ // frcz.ss/sd may need to have an argument dropped. Added in 3.2
if (IsX86 && Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
- F->setName(Name + ".old");
+ rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_xop_vfrcz_ss);
return true;
}
if (IsX86 && Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
- F->setName(Name + ".old");
+ rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_xop_vfrcz_sd);
return true;
}
- if (IsX86 && (Name.startswith("avx512.mask.pslli.") ||
- Name.startswith("avx512.mask.psrai.") ||
- Name.startswith("avx512.mask.psrli."))) {
- Intrinsic::ID ShiftID;
- if (Name.slice(12, 16) == "psll")
- ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psll_di_512
- : Intrinsic::x86_avx512_mask_psll_qi_512;
- else if (Name.slice(12, 16) == "psra")
- ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psra_di_512
- : Intrinsic::x86_avx512_mask_psra_qi_512;
- else
- ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psrl_di_512
- : Intrinsic::x86_avx512_mask_psrl_qi_512;
- F->setName("llvm.x86." + Name + ".old");
- NewFn = Intrinsic::getDeclaration(F->getParent(), ShiftID);
- return true;
- }
- // Fix the FMA4 intrinsics to remove the 4
- if (IsX86 && Name.startswith("fma4.")) {
- F->setName("llvm.x86.fma" + Name.substr(5));
- NewFn = F;
- return true;
- }
// Upgrade any XOP PERMIL2 index operand still using a float/double vector.
- if (IsX86 && Name.startswith("xop.vpermil2")) {
+ if (IsX86 && Name.startswith("xop.vpermil2")) { // Added in 3.9
auto Params = F->getFunctionType()->params();
auto Idx = Params[2];
if (Idx->getScalarType()->isFloatingPointTy()) {
- F->setName("llvm.x86." + Name + ".old");
+ rename(F);
unsigned IdxSize = Idx->getPrimitiveSizeInBits();
unsigned EltSize = Idx->getScalarSizeInBits();
Intrinsic::ID Permil2ID;
@@ -517,13 +582,23 @@ static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
return Builder.CreateSelect(Mask, Op0, Op1);
}
-static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder,
- Value *Op0, Value *Op1, Value *Shift,
- Value *Passthru, Value *Mask) {
+// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
+// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
+// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
+static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
+ Value *Op1, Value *Shift,
+ Value *Passthru, Value *Mask,
+ bool IsVALIGN) {
unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
unsigned NumElts = Op0->getType()->getVectorNumElements();
- assert(NumElts % 16 == 0);
+ assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
+ assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
+ assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
+
+ // Mask the immediate for VALIGN.
+ if (IsVALIGN)
+ ShiftVal &= (NumElts - 1);
// If palignr is shifting the pair of vectors more than the size of two
// lanes, emit zero.
@@ -540,10 +615,10 @@ static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder,
uint32_t Indices[64];
// 256-bit palignr operates on 128-bit lanes so we need to handle that
- for (unsigned l = 0; l != NumElts; l += 16) {
+ for (unsigned l = 0; l < NumElts; l += 16) {
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = ShiftVal + i;
- if (Idx >= 16)
+ if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
Idx += NumElts - 16; // End of lane, switch operand.
Indices[l + i] = Idx + l;
}
@@ -601,7 +676,12 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
Value *Op0 = CI.getArgOperand(0);
Value *Op1 = CI.getArgOperand(1);
Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
- return Builder.CreateSelect(Cmp, Op0, Op1);
+ Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
+
+ if (CI.getNumArgOperands() == 4)
+ Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
+
+ return Res;
}
static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
@@ -629,6 +709,30 @@ static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
std::max(NumElts, 8U)));
}
+// Replace a masked intrinsic with an older unmasked intrinsic.
+static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
+ Intrinsic::ID IID) {
+ Function *F = CI.getCalledFunction();
+ Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
+ Value *Rep = Builder.CreateCall(Intrin,
+ { CI.getArgOperand(0), CI.getArgOperand(1) });
+ return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
+}
+
+static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
+ Value* A = CI.getArgOperand(0);
+ Value* B = CI.getArgOperand(1);
+ Value* Src = CI.getArgOperand(2);
+ Value* Mask = CI.getArgOperand(3);
+
+ Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
+ Value* Cmp = Builder.CreateIsNotNull(AndNode);
+ Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
+ Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
+ Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
+ return Builder.CreateInsertElement(A, Select, (uint64_t)0);
+}
+
/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
@@ -650,67 +754,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (IsX86)
Name = Name.substr(4);
- Value *Rep;
- // Upgrade packed integer vector compare intrinsics to compare instructions.
- if (IsX86 && (Name.startswith("sse2.pcmpeq.") ||
- Name.startswith("avx2.pcmpeq."))) {
- Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
- "pcmpeq");
- Rep = Builder.CreateSExt(Rep, CI->getType(), "");
- } else if (IsX86 && (Name.startswith("sse2.pcmpgt.") ||
- Name.startswith("avx2.pcmpgt."))) {
- Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
- "pcmpgt");
- Rep = Builder.CreateSExt(Rep, CI->getType(), "");
- } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) {
- Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
- } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) {
- Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
- } else if (IsX86 && (Name == "sse41.pmaxsb" ||
- Name == "sse2.pmaxs.w" ||
- Name == "sse41.pmaxsd" ||
- Name.startswith("avx2.pmaxs"))) {
- Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
- } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
- Name == "sse41.pmaxuw" ||
- Name == "sse41.pmaxud" ||
- Name.startswith("avx2.pmaxu"))) {
- Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
- } else if (IsX86 && (Name == "sse41.pminsb" ||
- Name == "sse2.pmins.w" ||
- Name == "sse41.pminsd" ||
- Name.startswith("avx2.pmins"))) {
- Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
- } else if (IsX86 && (Name == "sse2.pminu.b" ||
- Name == "sse41.pminuw" ||
- Name == "sse41.pminud" ||
- Name.startswith("avx2.pminu"))) {
- Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
- } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
- Name == "sse2.cvtps2pd" ||
- Name == "avx.cvtdq2.pd.256" ||
- Name == "avx.cvt.ps2.pd.256")) {
- // Lossless i32/float to double conversion.
- // Extract the bottom elements if necessary and convert to double vector.
- Value *Src = CI->getArgOperand(0);
- VectorType *SrcTy = cast<VectorType>(Src->getType());
- VectorType *DstTy = cast<VectorType>(CI->getType());
- Rep = CI->getArgOperand(0);
-
- unsigned NumDstElts = DstTy->getNumElements();
- if (NumDstElts < SrcTy->getNumElements()) {
- assert(NumDstElts == 2 && "Unexpected vector size");
- uint32_t ShuffleMask[2] = { 0, 1 };
- Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
- ShuffleMask);
- }
-
- bool Int2Double = (StringRef::npos != Name.find("cvtdq2"));
- if (Int2Double)
- Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
- else
- Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
- } else if (IsX86 && Name.startswith("sse4a.movnt.")) {
+ if (IsX86 && Name.startswith("sse4a.movnt.")) {
Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;
Elts.push_back(
@@ -734,8 +778,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Remove intrinsic.
CI->eraseFromParent();
return;
- } else if (IsX86 && (Name.startswith("avx.movnt.") ||
- Name.startswith("avx512.storent."))) {
+ }
+
+ if (IsX86 && (Name.startswith("avx.movnt.") ||
+ Name.startswith("avx512.storent."))) {
Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;
Elts.push_back(
@@ -757,7 +803,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Remove intrinsic.
CI->eraseFromParent();
return;
- } else if (IsX86 && Name == "sse2.storel.dq") {
+ }
+
+ if (IsX86 && Name == "sse2.storel.dq") {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
@@ -772,9 +820,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Remove intrinsic.
CI->eraseFromParent();
return;
- } else if (IsX86 && (Name.startswith("sse.storeu.") ||
- Name.startswith("sse2.storeu.") ||
- Name.startswith("avx.storeu."))) {
+ }
+
+ if (IsX86 && (Name.startswith("sse.storeu.") ||
+ Name.startswith("sse2.storeu.") ||
+ Name.startswith("avx.storeu."))) {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
@@ -786,41 +836,140 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Remove intrinsic.
CI->eraseFromParent();
return;
- } else if (IsX86 && (Name.startswith("avx512.mask.storeu.p") ||
- Name.startswith("avx512.mask.storeu.b.") ||
- Name.startswith("avx512.mask.storeu.w.") ||
- Name.startswith("avx512.mask.storeu.d.") ||
- Name.startswith("avx512.mask.storeu.q."))) {
+ }
+
+ if (IsX86 && (Name.startswith("avx512.mask.storeu."))) {
UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), /*Aligned*/false);
// Remove intrinsic.
CI->eraseFromParent();
return;
- } else if (IsX86 && (Name.startswith("avx512.mask.store.p") ||
- Name.startswith("avx512.mask.store.b.") ||
- Name.startswith("avx512.mask.store.w.") ||
- Name.startswith("avx512.mask.store.d.") ||
- Name.startswith("avx512.mask.store.q."))) {
+ }
+
+ if (IsX86 && (Name.startswith("avx512.mask.store."))) {
UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), /*Aligned*/true);
// Remove intrinsic.
CI->eraseFromParent();
return;
- } else if (IsX86 && (Name.startswith("avx512.mask.loadu.p") ||
- Name.startswith("avx512.mask.loadu.b.") ||
- Name.startswith("avx512.mask.loadu.w.") ||
- Name.startswith("avx512.mask.loadu.d.") ||
- Name.startswith("avx512.mask.loadu.q."))) {
+ }
+
+ Value *Rep;
+ // Upgrade packed integer vector compare intrinsics to compare instructions.
+ if (IsX86 && (Name.startswith("sse2.pcmpeq.") ||
+ Name.startswith("avx2.pcmpeq."))) {
+ Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
+ "pcmpeq");
+ Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+ } else if (IsX86 && (Name.startswith("sse2.pcmpgt.") ||
+ Name.startswith("avx2.pcmpgt."))) {
+ Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
+ "pcmpgt");
+ Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+ } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
+ Type *I32Ty = Type::getInt32Ty(C);
+ Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
+ ConstantInt::get(I32Ty, 0));
+ Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
+ ConstantInt::get(I32Ty, 0));
+ Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
+ Builder.CreateFAdd(Elt0, Elt1),
+ ConstantInt::get(I32Ty, 0));
+ } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
+ Type *I32Ty = Type::getInt32Ty(C);
+ Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
+ ConstantInt::get(I32Ty, 0));
+ Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
+ ConstantInt::get(I32Ty, 0));
+ Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
+ Builder.CreateFSub(Elt0, Elt1),
+ ConstantInt::get(I32Ty, 0));
+ } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
+ Type *I32Ty = Type::getInt32Ty(C);
+ Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
+ ConstantInt::get(I32Ty, 0));
+ Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
+ ConstantInt::get(I32Ty, 0));
+ Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
+ Builder.CreateFMul(Elt0, Elt1),
+ ConstantInt::get(I32Ty, 0));
+ } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
+ Type *I32Ty = Type::getInt32Ty(C);
+ Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
+ ConstantInt::get(I32Ty, 0));
+ Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
+ ConstantInt::get(I32Ty, 0));
+ Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
+ Builder.CreateFDiv(Elt0, Elt1),
+ ConstantInt::get(I32Ty, 0));
+ } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) {
+ Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
+ } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) {
+ Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
+ } else if (IsX86 && (Name == "sse41.pmaxsb" ||
+ Name == "sse2.pmaxs.w" ||
+ Name == "sse41.pmaxsd" ||
+ Name.startswith("avx2.pmaxs") ||
+ Name.startswith("avx512.mask.pmaxs"))) {
+ Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
+ } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
+ Name == "sse41.pmaxuw" ||
+ Name == "sse41.pmaxud" ||
+ Name.startswith("avx2.pmaxu") ||
+ Name.startswith("avx512.mask.pmaxu"))) {
+ Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
+ } else if (IsX86 && (Name == "sse41.pminsb" ||
+ Name == "sse2.pmins.w" ||
+ Name == "sse41.pminsd" ||
+ Name.startswith("avx2.pmins") ||
+ Name.startswith("avx512.mask.pmins"))) {
+ Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
+ } else if (IsX86 && (Name == "sse2.pminu.b" ||
+ Name == "sse41.pminuw" ||
+ Name == "sse41.pminud" ||
+ Name.startswith("avx2.pminu") ||
+ Name.startswith("avx512.mask.pminu"))) {
+ Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
+ } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
+ Name == "sse2.cvtps2pd" ||
+ Name == "avx.cvtdq2.pd.256" ||
+ Name == "avx.cvt.ps2.pd.256" ||
+ Name.startswith("avx512.mask.cvtdq2pd.") ||
+ Name.startswith("avx512.mask.cvtudq2pd."))) {
+ // Lossless i32/float to double conversion.
+ // Extract the bottom elements if necessary and convert to double vector.
+ Value *Src = CI->getArgOperand(0);
+ VectorType *SrcTy = cast<VectorType>(Src->getType());
+ VectorType *DstTy = cast<VectorType>(CI->getType());
+ Rep = CI->getArgOperand(0);
+
+ unsigned NumDstElts = DstTy->getNumElements();
+ if (NumDstElts < SrcTy->getNumElements()) {
+ assert(NumDstElts == 2 && "Unexpected vector size");
+ uint32_t ShuffleMask[2] = { 0, 1 };
+ Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
+ ShuffleMask);
+ }
+
+ bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
+ bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
+ if (SInt2Double)
+ Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
+ else if (UInt2Double)
+ Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
+ else
+ Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
+
+ if (CI->getNumArgOperands() == 3)
+ Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
+ CI->getArgOperand(1));
+ } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
CI->getArgOperand(1), CI->getArgOperand(2),
/*Aligned*/false);
- } else if (IsX86 && (Name.startswith("avx512.mask.load.p") ||
- Name.startswith("avx512.mask.load.b.") ||
- Name.startswith("avx512.mask.load.w.") ||
- Name.startswith("avx512.mask.load.d.") ||
- Name.startswith("avx512.mask.load.q."))) {
+ } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),CI->getArgOperand(2),
/*Aligned*/true);
@@ -886,7 +1035,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
Rep = Builder.CreateZExt(Rep, CI->getType(), "");
- } else if (IsX86 && Name.startswith("avx.vbroadcast")) {
+ } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
// Replace broadcasts with a series of insertelements.
Type *VecTy = CI->getType();
Type *EltTy = VecTy->getVectorElementType();
@@ -902,7 +1051,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
} else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
Name.startswith("sse41.pmovzx") ||
Name.startswith("avx2.pmovsx") ||
- Name.startswith("avx2.pmovzx"))) {
+ Name.startswith("avx2.pmovzx") ||
+ Name.startswith("avx512.mask.pmovsx") ||
+ Name.startswith("avx512.mask.pmovzx"))) {
VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
VectorType *DstTy = cast<VectorType>(CI->getType());
unsigned NumDstElts = DstTy->getNumElements();
@@ -918,15 +1069,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
bool DoSext = (StringRef::npos != Name.find("pmovsx"));
Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
: Builder.CreateZExt(SV, DstTy);
- } else if (IsX86 && Name == "avx2.vbroadcasti128") {
- // Replace vbroadcasts with a vector shuffle.
- Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
+ // If there are 3 arguments, it's a masked intrinsic so we need a select.
+ if (CI->getNumArgOperands() == 3)
+ Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
+ CI->getArgOperand(1));
+ } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
+ Name == "avx2.vbroadcasti128")) {
+ // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
+ Type *EltTy = CI->getType()->getVectorElementType();
+ unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
+ Type *VT = VectorType::get(EltTy, NumSrcElts);
Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
PointerType::getUnqual(VT));
- Value *Load = Builder.CreateLoad(VT, Op);
- uint32_t Idxs[4] = { 0, 1, 0, 1 };
- Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
- Idxs);
+ Value *Load = Builder.CreateAlignedLoad(Op, 1);
+ if (NumSrcElts == 2)
+ Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+ { 0, 1, 0, 1 });
+ else
+ Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+ { 0, 1, 2, 3, 0, 1, 2, 3 });
} else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
Name.startswith("avx2.vbroadcast") ||
Name.startswith("avx512.pbroadcast") ||
@@ -942,11 +1103,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
- Rep = UpgradeX86PALIGNRIntrinsics(Builder, CI->getArgOperand(0),
- CI->getArgOperand(1),
- CI->getArgOperand(2),
- CI->getArgOperand(3),
- CI->getArgOperand(4));
+ Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
+ CI->getArgOperand(1),
+ CI->getArgOperand(2),
+ CI->getArgOperand(3),
+ CI->getArgOperand(4),
+ false);
+ } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
+ Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
+ CI->getArgOperand(1),
+ CI->getArgOperand(2),
+ CI->getArgOperand(3),
+ CI->getArgOperand(4),
+ true);
} else if (IsX86 && (Name == "sse2.psll.dq" ||
Name == "avx2.psll.dq")) {
// 128/256-bit shift left specified in bits.
@@ -988,21 +1157,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
} else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
- Name == "avx2.vinserti128")) {
+ Name == "avx2.vinserti128" ||
+ Name.startswith("avx512.mask.insert"))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- VectorType *VecTy = cast<VectorType>(CI->getType());
- unsigned NumElts = VecTy->getNumElements();
+ unsigned DstNumElts = CI->getType()->getVectorNumElements();
+ unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
+ unsigned Scale = DstNumElts / SrcNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
- Imm = Imm & 1;
+ Imm = Imm % Scale;
- // Extend the second operand into a vector that is twice as big.
+ // Extend the second operand into a vector the size of the destination.
Value *UndefV = UndefValue::get(Op1->getType());
- SmallVector<uint32_t, 8> Idxs(NumElts);
- for (unsigned i = 0; i != NumElts; ++i)
+ SmallVector<uint32_t, 8> Idxs(DstNumElts);
+ for (unsigned i = 0; i != SrcNumElts; ++i)
Idxs[i] = i;
+ for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
+ Idxs[i] = SrcNumElts;
Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
// Insert the second operand into the first operand.
@@ -1016,33 +1189,41 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
// Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
- // The low half of the result is either the low half of the 1st operand
- // or the low half of the 2nd operand (the inserted vector).
- for (unsigned i = 0; i != NumElts / 2; ++i)
- Idxs[i] = Imm ? i : (i + NumElts);
- // The high half of the result is either the low half of the 2nd operand
- // (the inserted vector) or the high half of the 1st operand.
- for (unsigned i = NumElts / 2; i != NumElts; ++i)
- Idxs[i] = Imm ? (i + NumElts / 2) : i;
+ // First fill with identify mask.
+ for (unsigned i = 0; i != DstNumElts; ++i)
+ Idxs[i] = i;
+ // Then replace the elements where we need to insert.
+ for (unsigned i = 0; i != SrcNumElts; ++i)
+ Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
+
+ // If the intrinsic has a mask operand, handle that.
+ if (CI->getNumArgOperands() == 5)
+ Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
+ CI->getArgOperand(3));
} else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
- Name == "avx2.vextracti128")) {
+ Name == "avx2.vextracti128" ||
+ Name.startswith("avx512.mask.vextract"))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- VectorType *VecTy = cast<VectorType>(CI->getType());
- unsigned NumElts = VecTy->getNumElements();
+ unsigned DstNumElts = CI->getType()->getVectorNumElements();
+ unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
+ unsigned Scale = SrcNumElts / DstNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
- Imm = Imm & 1;
+ Imm = Imm % Scale;
- // Get indexes for either the high half or low half of the input vector.
- SmallVector<uint32_t, 4> Idxs(NumElts);
- for (unsigned i = 0; i != NumElts; ++i) {
- Idxs[i] = Imm ? (i + NumElts) : i;
+ // Get indexes for the subvector of the input vector.
+ SmallVector<uint32_t, 8> Idxs(DstNumElts);
+ for (unsigned i = 0; i != DstNumElts; ++i) {
+ Idxs[i] = i + (Imm * DstNumElts);
}
+ Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
- Value *UndefV = UndefValue::get(Op0->getType());
- Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs);
+ // If the intrinsic has a mask operand, handle that.
+ if (CI->getNumArgOperands() == 4)
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
} else if (!IsX86 && Name == "stackprotectorcheck") {
Rep = nullptr;
} else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
@@ -1123,6 +1304,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (CI->getNumArgOperands() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ unsigned NumElts = CI->getType()->getVectorNumElements();
+
+ unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
+ unsigned HalfLaneElts = NumLaneElts / 2;
+
+ SmallVector<uint32_t, 16> Idxs(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // Base index is the starting element of the lane.
+ Idxs[i] = i - (i % NumLaneElts);
+ // If we are half way through the lane switch to the other source.
+ if ((i % NumLaneElts) >= HalfLaneElts)
+ Idxs[i] += NumElts;
+ // Now select the specific element. By adding HalfLaneElts bits from
+ // the immediate. Wrapping around the immediate every 8-bits.
+ Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
+ }
+
+ Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
+
+ Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
+ CI->getArgOperand(3));
} else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
Name.startswith("avx512.mask.movshdup") ||
Name.startswith("avx512.mask.movsldup"))) {
@@ -1194,6 +1400,333 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
+ VectorType *FTy = cast<VectorType>(CI->getType());
+ VectorType *ITy = VectorType::getInteger(FTy);
+ Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
+ Builder.CreateBitCast(CI->getArgOperand(1), ITy));
+ Rep = Builder.CreateBitCast(Rep, FTy);
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
+ VectorType *FTy = cast<VectorType>(CI->getType());
+ VectorType *ITy = VectorType::getInteger(FTy);
+ Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
+ Rep = Builder.CreateAnd(Rep,
+ Builder.CreateBitCast(CI->getArgOperand(1), ITy));
+ Rep = Builder.CreateBitCast(Rep, FTy);
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
+ VectorType *FTy = cast<VectorType>(CI->getType());
+ VectorType *ITy = VectorType::getInteger(FTy);
+ Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
+ Builder.CreateBitCast(CI->getArgOperand(1), ITy));
+ Rep = Builder.CreateBitCast(Rep, FTy);
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
+ VectorType *FTy = cast<VectorType>(CI->getType());
+ VectorType *ITy = VectorType::getInteger(FTy);
+ Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
+ Builder.CreateBitCast(CI->getArgOperand(1), ITy));
+ Rep = Builder.CreateBitCast(Rep, FTy);
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
+ Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
+ Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
+ Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
+ Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
+ Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
+ Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
+ Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
+ VectorType *VecTy = cast<VectorType>(CI->getType());
+ Intrinsic::ID IID;
+ if (VecTy->getPrimitiveSizeInBits() == 128)
+ IID = Intrinsic::x86_ssse3_pshuf_b_128;
+ else if (VecTy->getPrimitiveSizeInBits() == 256)
+ IID = Intrinsic::x86_avx2_pshuf_b;
+ else if (VecTy->getPrimitiveSizeInBits() == 512)
+ IID = Intrinsic::x86_avx512_pshuf_b_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ { CI->getArgOperand(0), CI->getArgOperand(1) });
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
+ Name.startswith("avx512.mask.pmulu.dq."))) {
+ bool IsUnsigned = Name[16] == 'u';
+ VectorType *VecTy = cast<VectorType>(CI->getType());
+ Intrinsic::ID IID;
+ if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
+ IID = Intrinsic::x86_sse41_pmuldq;
+ else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
+ IID = Intrinsic::x86_avx2_pmul_dq;
+ else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
+ IID = Intrinsic::x86_avx512_pmul_dq_512;
+ else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
+ IID = Intrinsic::x86_sse2_pmulu_dq;
+ else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
+ IID = Intrinsic::x86_avx2_pmulu_dq;
+ else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
+ IID = Intrinsic::x86_avx512_pmulu_dq_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ { CI->getArgOperand(0), CI->getArgOperand(1) });
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
+ bool IsImmediate = Name[16] == 'i' ||
+ (Name.size() > 18 && Name[18] == 'i');
+ bool IsVariable = Name[16] == 'v';
+ char Size = Name[16] == '.' ? Name[17] :
+ Name[17] == '.' ? Name[18] :
+ Name[18] == '.' ? Name[19] :
+ Name[20];
+
+ Intrinsic::ID IID;
+ if (IsVariable && Name[17] != '.') {
+ if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
+ IID = Intrinsic::x86_avx2_psllv_q;
+ else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
+ IID = Intrinsic::x86_avx2_psllv_q_256;
+ else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
+ IID = Intrinsic::x86_avx2_psllv_d;
+ else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
+ IID = Intrinsic::x86_avx2_psllv_d_256;
+ else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
+ IID = Intrinsic::x86_avx512_psllv_w_128;
+ else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
+ IID = Intrinsic::x86_avx512_psllv_w_256;
+ else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
+ IID = Intrinsic::x86_avx512_psllv_w_512;
+ else
+ llvm_unreachable("Unexpected size");
+ } else if (Name.endswith(".128")) {
+ if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
+ IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
+ : Intrinsic::x86_sse2_psll_d;
+ else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
+ IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
+ : Intrinsic::x86_sse2_psll_q;
+ else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
+ IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
+ : Intrinsic::x86_sse2_psll_w;
+ else
+ llvm_unreachable("Unexpected size");
+ } else if (Name.endswith(".256")) {
+ if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
+ IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
+ : Intrinsic::x86_avx2_psll_d;
+ else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
+ IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
+ : Intrinsic::x86_avx2_psll_q;
+ else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
+ IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
+ : Intrinsic::x86_avx2_psll_w;
+ else
+ llvm_unreachable("Unexpected size");
+ } else {
+ if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
+ IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
+ IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
+ Intrinsic::x86_avx512_psll_d_512;
+ else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
+ IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
+ IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
+ Intrinsic::x86_avx512_psll_q_512;
+ else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
+ IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
+ : Intrinsic::x86_avx512_psll_w_512;
+ else
+ llvm_unreachable("Unexpected size");
+ }
+
+ Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
+ } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
+ bool IsImmediate = Name[16] == 'i' ||
+ (Name.size() > 18 && Name[18] == 'i');
+ bool IsVariable = Name[16] == 'v';
+ char Size = Name[16] == '.' ? Name[17] :
+ Name[17] == '.' ? Name[18] :
+ Name[18] == '.' ? Name[19] :
+ Name[20];
+
+ Intrinsic::ID IID;
+ if (IsVariable && Name[17] != '.') {
+ if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
+ IID = Intrinsic::x86_avx2_psrlv_q;
+ else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
+ IID = Intrinsic::x86_avx2_psrlv_q_256;
+ else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
+ IID = Intrinsic::x86_avx2_psrlv_d;
+ else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
+ IID = Intrinsic::x86_avx2_psrlv_d_256;
+ else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
+ IID = Intrinsic::x86_avx512_psrlv_w_128;
+ else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
+ IID = Intrinsic::x86_avx512_psrlv_w_256;
+ else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
+ IID = Intrinsic::x86_avx512_psrlv_w_512;
+ else
+ llvm_unreachable("Unexpected size");
+ } else if (Name.endswith(".128")) {
+ if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
+ IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
+ : Intrinsic::x86_sse2_psrl_d;
+ else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
+ IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
+ : Intrinsic::x86_sse2_psrl_q;
+ else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
+ IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
+ : Intrinsic::x86_sse2_psrl_w;
+ else
+ llvm_unreachable("Unexpected size");
+ } else if (Name.endswith(".256")) {
+ if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
+ IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
+ : Intrinsic::x86_avx2_psrl_d;
+ else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
+ IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
+ : Intrinsic::x86_avx2_psrl_q;
+ else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
+ IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
+ : Intrinsic::x86_avx2_psrl_w;
+ else
+ llvm_unreachable("Unexpected size");
+ } else {
+ if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
+ IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
+ IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
+ Intrinsic::x86_avx512_psrl_d_512;
+ else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
+ IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
+ IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
+ Intrinsic::x86_avx512_psrl_q_512;
+ else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
+ IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
+ : Intrinsic::x86_avx512_psrl_w_512;
+ else
+ llvm_unreachable("Unexpected size");
+ }
+
+ Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
+ } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
+ bool IsImmediate = Name[16] == 'i' ||
+ (Name.size() > 18 && Name[18] == 'i');
+ bool IsVariable = Name[16] == 'v';
+ char Size = Name[16] == '.' ? Name[17] :
+ Name[17] == '.' ? Name[18] :
+ Name[18] == '.' ? Name[19] :
+ Name[20];
+
+ Intrinsic::ID IID;
+ if (IsVariable && Name[17] != '.') {
+ if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
+ IID = Intrinsic::x86_avx2_psrav_d;
+ else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
+ IID = Intrinsic::x86_avx2_psrav_d_256;
+ else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
+ IID = Intrinsic::x86_avx512_psrav_w_128;
+ else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
+ IID = Intrinsic::x86_avx512_psrav_w_256;
+ else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
+ IID = Intrinsic::x86_avx512_psrav_w_512;
+ else
+ llvm_unreachable("Unexpected size");
+ } else if (Name.endswith(".128")) {
+ if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
+ IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
+ : Intrinsic::x86_sse2_psra_d;
+ else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
+ IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
+ IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
+ Intrinsic::x86_avx512_psra_q_128;
+ else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
+ IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
+ : Intrinsic::x86_sse2_psra_w;
+ else
+ llvm_unreachable("Unexpected size");
+ } else if (Name.endswith(".256")) {
+ if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
+ IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
+ : Intrinsic::x86_avx2_psra_d;
+ else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
+ IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
+ IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
+ Intrinsic::x86_avx512_psra_q_256;
+ else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
+ IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
+ : Intrinsic::x86_avx2_psra_w;
+ else
+ llvm_unreachable("Unexpected size");
+ } else {
+ if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
+ IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
+ IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
+ Intrinsic::x86_avx512_psra_d_512;
+ else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
+ IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
+ IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
+ Intrinsic::x86_avx512_psra_q_512;
+ else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
+ IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
+ : Intrinsic::x86_avx512_psra_w_512;
+ else
+ llvm_unreachable("Unexpected size");
+ }
+
+ Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
+ } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
+ Rep = upgradeMaskedMove(Builder, *CI);
+ } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
+ Intrinsic::ID IID;
+ if (Name.endswith("ps.128"))
+ IID = Intrinsic::x86_avx_vpermilvar_ps;
+ else if (Name.endswith("pd.128"))
+ IID = Intrinsic::x86_avx_vpermilvar_pd;
+ else if (Name.endswith("ps.256"))
+ IID = Intrinsic::x86_avx_vpermilvar_ps_256;
+ else if (Name.endswith("pd.256"))
+ IID = Intrinsic::x86_avx_vpermilvar_pd_256;
+ else if (Name.endswith("ps.512"))
+ IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
+ else if (Name.endswith("pd.512"))
+ IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
+ else
+ llvm_unreachable("Unexpected vpermilvar intrinsic");
+
+ Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
+ Rep = Builder.CreateCall(Intrin,
+ { CI->getArgOperand(0), CI->getArgOperand(1) });
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
@@ -1212,12 +1745,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
default:
llvm_unreachable("Unknown function for CallInst upgrade.");
- case Intrinsic::x86_avx512_mask_psll_di_512:
- case Intrinsic::x86_avx512_mask_psra_di_512:
- case Intrinsic::x86_avx512_mask_psrl_di_512:
- case Intrinsic::x86_avx512_mask_psll_qi_512:
- case Intrinsic::x86_avx512_mask_psra_qi_512:
- case Intrinsic::x86_avx512_mask_psrl_qi_512:
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
@@ -1239,6 +1766,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
return;
}
+ case Intrinsic::bitreverse:
+ CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
+ CI->eraseFromParent();
+ return;
+
case Intrinsic::ctlz:
case Intrinsic::cttz:
assert(CI->getNumArgOperands() == 1 &&
@@ -1332,6 +1864,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
return;
}
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
case Intrinsic::masked_load:
case Intrinsic::masked_store: {
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
@@ -1361,28 +1895,26 @@ void llvm::UpgradeCallsToIntrinsic(Function *F) {
}
}
-void llvm::UpgradeInstWithTBAATag(Instruction *I) {
- MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
- assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
+MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
// Check if the tag uses struct-path aware TBAA format.
- if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
- return;
+ if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
+ return &MD;
- if (MD->getNumOperands() == 3) {
- Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
- MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
+ auto &Context = MD.getContext();
+ if (MD.getNumOperands() == 3) {
+ Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
+ MDNode *ScalarType = MDNode::get(Context, Elts);
// Create a MDNode <ScalarType, ScalarType, offset 0, const>
Metadata *Elts2[] = {ScalarType, ScalarType,
- ConstantAsMetadata::get(Constant::getNullValue(
- Type::getInt64Ty(I->getContext()))),
- MD->getOperand(2)};
- I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
- } else {
- // Create a MDNode <MD, MD, offset 0>
- Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
- Type::getInt64Ty(I->getContext())))};
- I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
+ ConstantAsMetadata::get(
+ Constant::getNullValue(Type::getInt64Ty(Context))),
+ MD.getOperand(2)};
+ return MDNode::get(Context, Elts2);
}
+ // Create a MDNode <MD, MD, offset 0>
+ Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
+ Type::getInt64Ty(Context)))};
+ return MDNode::get(Context, Elts);
}
Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
@@ -1462,11 +1994,11 @@ bool llvm::UpgradeModuleFlags(Module &M) {
}
// "Objective-C Class Properties" is recently added for Objective-C. We
// upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
- // flag of value 0, so we can correclty report error when trying to link
- // an ObjC bitcode without this module flag with an ObjC bitcode with this
- // module flag.
+ // flag of value 0, so we can correclty downgrade this flag when trying to
+ // link an ObjC bitcode without this module flag with an ObjC bitcode with
+ // this module flag.
if (HasObjCFlag && !HasClassProperties) {
- M.addModuleFlag(llvm::Module::Error, "Objective-C Class Properties",
+ M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
(uint32_t)0);
return true;
}
@@ -1524,7 +2056,7 @@ MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
if (!T)
return &N;
- if (!llvm::any_of(T->operands(), isOldLoopArgument))
+ if (none_of(T->operands(), isOldLoopArgument))
return &N;
SmallVector<Metadata *, 8> Ops;
OpenPOWER on IntegriCloud