summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/IR/AutoUpgrade.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/IR/AutoUpgrade.cpp')
-rw-r--r--contrib/llvm/lib/IR/AutoUpgrade.cpp954
1 files changed, 618 insertions, 336 deletions
diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp
index e3a7bae..a501799 100644
--- a/contrib/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/AutoUpgrade.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -33,10 +34,10 @@ using namespace llvm;
static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
-// Upgrade the declarations of the SSE4.1 functions whose arguments have
+// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
// changed their type from v4f32 to v2i64.
-static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
- Function *&NewFn) {
+static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
+ Function *&NewFn) {
// Check whether this is an old version of the function, which received
// v4f32 arguments.
Type *Arg0Type = F->getFunctionType()->getParamType(0);
@@ -65,6 +66,270 @@ static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
return true;
}
+static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
+ // All of the intrinsics matches below should be marked with which llvm
+ // version started autoupgrading them. At some point in the future we would
+ // like to use this information to remove upgrade code for some older
+ // intrinsics. It is currently undecided how we will determine that future
+ // point.
+ if (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
+ Name.startswith("sse2.pcmpgt.") || // Added in 3.1
+ Name.startswith("avx2.pcmpeq.") || // Added in 3.1
+ Name.startswith("avx2.pcmpgt.") || // Added in 3.1
+ Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
+ Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
+ Name == "sse.add.ss" || // Added in 4.0
+ Name == "sse2.add.sd" || // Added in 4.0
+ Name == "sse.sub.ss" || // Added in 4.0
+ Name == "sse2.sub.sd" || // Added in 4.0
+ Name == "sse.mul.ss" || // Added in 4.0
+ Name == "sse2.mul.sd" || // Added in 4.0
+ Name == "sse.div.ss" || // Added in 4.0
+ Name == "sse2.div.sd" || // Added in 4.0
+ Name == "sse41.pmaxsb" || // Added in 3.9
+ Name == "sse2.pmaxs.w" || // Added in 3.9
+ Name == "sse41.pmaxsd" || // Added in 3.9
+ Name == "sse2.pmaxu.b" || // Added in 3.9
+ Name == "sse41.pmaxuw" || // Added in 3.9
+ Name == "sse41.pmaxud" || // Added in 3.9
+ Name == "sse41.pminsb" || // Added in 3.9
+ Name == "sse2.pmins.w" || // Added in 3.9
+ Name == "sse41.pminsd" || // Added in 3.9
+ Name == "sse2.pminu.b" || // Added in 3.9
+ Name == "sse41.pminuw" || // Added in 3.9
+ Name == "sse41.pminud" || // Added in 3.9
+ Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
+ Name.startswith("avx2.pmax") || // Added in 3.9
+ Name.startswith("avx2.pmin") || // Added in 3.9
+ Name.startswith("avx512.mask.pmax") || // Added in 4.0
+ Name.startswith("avx512.mask.pmin") || // Added in 4.0
+ Name.startswith("avx2.vbroadcast") || // Added in 3.8
+ Name.startswith("avx2.pbroadcast") || // Added in 3.8
+ Name.startswith("avx.vpermil.") || // Added in 3.1
+ Name.startswith("sse2.pshuf") || // Added in 3.9
+ Name.startswith("avx512.pbroadcast") || // Added in 3.9
+ Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
+ Name.startswith("avx512.mask.movddup") || // Added in 3.9
+ Name.startswith("avx512.mask.movshdup") || // Added in 3.9
+ Name.startswith("avx512.mask.movsldup") || // Added in 3.9
+ Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
+ Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
+ Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
+ Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
+ Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
+ Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
+ Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
+ Name.startswith("avx512.mask.punpckl") || // Added in 3.9
+ Name.startswith("avx512.mask.punpckh") || // Added in 3.9
+ Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
+ Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
+ Name.startswith("avx512.mask.pand.") || // Added in 3.9
+ Name.startswith("avx512.mask.pandn.") || // Added in 3.9
+ Name.startswith("avx512.mask.por.") || // Added in 3.9
+ Name.startswith("avx512.mask.pxor.") || // Added in 3.9
+ Name.startswith("avx512.mask.and.") || // Added in 3.9
+ Name.startswith("avx512.mask.andn.") || // Added in 3.9
+ Name.startswith("avx512.mask.or.") || // Added in 3.9
+ Name.startswith("avx512.mask.xor.") || // Added in 3.9
+ Name.startswith("avx512.mask.padd.") || // Added in 4.0
+ Name.startswith("avx512.mask.psub.") || // Added in 4.0
+ Name.startswith("avx512.mask.pmull.") || // Added in 4.0
+ Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
+ Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
+ Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
+ Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
+ Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
+ Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
+ Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
+ Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
+ Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
+ Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
+ Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
+ Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
+ Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
+ Name == "avx512.mask.add.pd.128" || // Added in 4.0
+ Name == "avx512.mask.add.pd.256" || // Added in 4.0
+ Name == "avx512.mask.add.ps.128" || // Added in 4.0
+ Name == "avx512.mask.add.ps.256" || // Added in 4.0
+ Name == "avx512.mask.div.pd.128" || // Added in 4.0
+ Name == "avx512.mask.div.pd.256" || // Added in 4.0
+ Name == "avx512.mask.div.ps.128" || // Added in 4.0
+ Name == "avx512.mask.div.ps.256" || // Added in 4.0
+ Name == "avx512.mask.mul.pd.128" || // Added in 4.0
+ Name == "avx512.mask.mul.pd.256" || // Added in 4.0
+ Name == "avx512.mask.mul.ps.128" || // Added in 4.0
+ Name == "avx512.mask.mul.ps.256" || // Added in 4.0
+ Name == "avx512.mask.sub.pd.128" || // Added in 4.0
+ Name == "avx512.mask.sub.pd.256" || // Added in 4.0
+ Name == "avx512.mask.sub.ps.128" || // Added in 4.0
+ Name == "avx512.mask.sub.ps.256" || // Added in 4.0
+ Name == "avx512.mask.max.pd.128" || // Added in 5.0
+ Name == "avx512.mask.max.pd.256" || // Added in 5.0
+ Name == "avx512.mask.max.ps.128" || // Added in 5.0
+ Name == "avx512.mask.max.ps.256" || // Added in 5.0
+ Name == "avx512.mask.min.pd.128" || // Added in 5.0
+ Name == "avx512.mask.min.pd.256" || // Added in 5.0
+ Name == "avx512.mask.min.ps.128" || // Added in 5.0
+ Name == "avx512.mask.min.ps.256" || // Added in 5.0
+ Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
+ Name.startswith("avx512.mask.psll.d") || // Added in 4.0
+ Name.startswith("avx512.mask.psll.q") || // Added in 4.0
+ Name.startswith("avx512.mask.psll.w") || // Added in 4.0
+ Name.startswith("avx512.mask.psra.d") || // Added in 4.0
+ Name.startswith("avx512.mask.psra.q") || // Added in 4.0
+ Name.startswith("avx512.mask.psra.w") || // Added in 4.0
+ Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
+ Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
+ Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
+ Name.startswith("avx512.mask.pslli") || // Added in 4.0
+ Name.startswith("avx512.mask.psrai") || // Added in 4.0
+ Name.startswith("avx512.mask.psrli") || // Added in 4.0
+ Name.startswith("avx512.mask.psllv") || // Added in 4.0
+ Name.startswith("avx512.mask.psrav") || // Added in 4.0
+ Name.startswith("avx512.mask.psrlv") || // Added in 4.0
+ Name.startswith("sse41.pmovsx") || // Added in 3.8
+ Name.startswith("sse41.pmovzx") || // Added in 3.9
+ Name.startswith("avx2.pmovsx") || // Added in 3.9
+ Name.startswith("avx2.pmovzx") || // Added in 3.9
+ Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
+ Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
+ Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
+ Name == "sse2.cvtdq2pd" || // Added in 3.9
+ Name == "sse2.cvtps2pd" || // Added in 3.9
+ Name == "avx.cvtdq2.pd.256" || // Added in 3.9
+ Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
+ Name.startswith("avx.vinsertf128.") || // Added in 3.7
+ Name == "avx2.vinserti128" || // Added in 3.7
+ Name.startswith("avx512.mask.insert") || // Added in 4.0
+ Name.startswith("avx.vextractf128.") || // Added in 3.7
+ Name == "avx2.vextracti128" || // Added in 3.7
+ Name.startswith("avx512.mask.vextract") || // Added in 4.0
+ Name.startswith("sse4a.movnt.") || // Added in 3.9
+ Name.startswith("avx.movnt.") || // Added in 3.2
+ Name.startswith("avx512.storent.") || // Added in 3.9
+ Name == "sse41.movntdqa" || // Added in 5.0
+ Name == "avx2.movntdqa" || // Added in 5.0
+ Name == "avx512.movntdqa" || // Added in 5.0
+ Name == "sse2.storel.dq" || // Added in 3.9
+ Name.startswith("sse.storeu.") || // Added in 3.9
+ Name.startswith("sse2.storeu.") || // Added in 3.9
+ Name.startswith("avx.storeu.") || // Added in 3.9
+ Name.startswith("avx512.mask.storeu.") || // Added in 3.9
+ Name.startswith("avx512.mask.store.p") || // Added in 3.9
+ Name.startswith("avx512.mask.store.b.") || // Added in 3.9
+ Name.startswith("avx512.mask.store.w.") || // Added in 3.9
+ Name.startswith("avx512.mask.store.d.") || // Added in 3.9
+ Name.startswith("avx512.mask.store.q.") || // Added in 3.9
+ Name.startswith("avx512.mask.loadu.") || // Added in 3.9
+ Name.startswith("avx512.mask.load.") || // Added in 3.9
+ Name == "sse42.crc32.64.8" || // Added in 3.4
+ Name.startswith("avx.vbroadcast.s") || // Added in 3.5
+ Name.startswith("avx512.mask.palignr.") || // Added in 3.9
+ Name.startswith("avx512.mask.valign.") || // Added in 4.0
+ Name.startswith("sse2.psll.dq") || // Added in 3.7
+ Name.startswith("sse2.psrl.dq") || // Added in 3.7
+ Name.startswith("avx2.psll.dq") || // Added in 3.7
+ Name.startswith("avx2.psrl.dq") || // Added in 3.7
+ Name.startswith("avx512.psll.dq") || // Added in 3.9
+ Name.startswith("avx512.psrl.dq") || // Added in 3.9
+ Name == "sse41.pblendw" || // Added in 3.7
+ Name.startswith("sse41.blendp") || // Added in 3.7
+ Name.startswith("avx.blend.p") || // Added in 3.7
+ Name == "avx2.pblendw" || // Added in 3.7
+ Name.startswith("avx2.pblendd.") || // Added in 3.7
+ Name.startswith("avx.vbroadcastf128") || // Added in 4.0
+ Name == "avx2.vbroadcasti128" || // Added in 3.7
+ Name == "xop.vpcmov" || // Added in 3.8
+ Name == "xop.vpcmov.256" || // Added in 5.0
+ Name.startswith("avx512.mask.move.s") || // Added in 4.0
+ Name.startswith("avx512.cvtmask2") || // Added in 5.0
+ (Name.startswith("xop.vpcom") && // Added in 3.2
+ F->arg_size() == 2))
+ return true;
+
+ return false;
+}
+
+static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
+ Function *&NewFn) {
+ // Only handle intrinsics that start with "x86.".
+ if (!Name.startswith("x86."))
+ return false;
+ // Remove "x86." prefix.
+ Name = Name.substr(4);
+
+ if (ShouldUpgradeX86Intrinsic(F, Name)) {
+ NewFn = nullptr;
+ return true;
+ }
+
+ // SSE4.1 ptest functions may have an old signature.
+ if (Name.startswith("sse41.ptest")) { // Added in 3.2
+ if (Name.substr(11) == "c")
+ return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
+ if (Name.substr(11) == "z")
+ return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
+ if (Name.substr(11) == "nzc")
+ return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
+ }
+ // Several blend and other instructions with masks used the wrong number of
+ // bits.
+ if (Name == "sse41.insertps") // Added in 3.6
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
+ NewFn);
+ if (Name == "sse41.dppd") // Added in 3.6
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
+ NewFn);
+ if (Name == "sse41.dpps") // Added in 3.6
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
+ NewFn);
+ if (Name == "sse41.mpsadbw") // Added in 3.6
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
+ NewFn);
+ if (Name == "avx.dp.ps.256") // Added in 3.6
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
+ NewFn);
+ if (Name == "avx2.mpsadbw") // Added in 3.6
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
+ NewFn);
+
+ // frcz.ss/sd may need to have an argument dropped. Added in 3.2
+ if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::x86_xop_vfrcz_ss);
+ return true;
+ }
+ if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::x86_xop_vfrcz_sd);
+ return true;
+ }
+ // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
+ if (Name.startswith("xop.vpermil2")) { // Added in 3.9
+ auto Idx = F->getFunctionType()->getParamType(2);
+ if (Idx->isFPOrFPVectorTy()) {
+ rename(F);
+ unsigned IdxSize = Idx->getPrimitiveSizeInBits();
+ unsigned EltSize = Idx->getScalarSizeInBits();
+ Intrinsic::ID Permil2ID;
+ if (EltSize == 64 && IdxSize == 128)
+ Permil2ID = Intrinsic::x86_xop_vpermil2pd;
+ else if (EltSize == 32 && IdxSize == 128)
+ Permil2ID = Intrinsic::x86_xop_vpermil2ps;
+ else if (EltSize == 64 && IdxSize == 256)
+ Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
+ else
+ Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
+ return true;
+ }
+ }
+
+ return false;
+}
+
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
assert(F && "Illegal to upgrade a non-existent Function.");
@@ -155,26 +420,31 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
}
- case 'i': {
- if (Name.startswith("invariant.start")) {
+ case 'i':
+ case 'l': {
+ bool IsLifetimeStart = Name.startswith("lifetime.start");
+ if (IsLifetimeStart || Name.startswith("invariant.start")) {
+ Intrinsic::ID ID = IsLifetimeStart ?
+ Intrinsic::lifetime_start : Intrinsic::invariant_start;
auto Args = F->getFunctionType()->params();
Type* ObjectPtr[1] = {Args[1]};
- if (F->getName() !=
- Intrinsic::getName(Intrinsic::invariant_start, ObjectPtr)) {
+ if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
rename(F);
- NewFn = Intrinsic::getDeclaration(
- F->getParent(), Intrinsic::invariant_start, ObjectPtr);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
return true;
}
}
- if (Name.startswith("invariant.end")) {
+
+ bool IsLifetimeEnd = Name.startswith("lifetime.end");
+ if (IsLifetimeEnd || Name.startswith("invariant.end")) {
+ Intrinsic::ID ID = IsLifetimeEnd ?
+ Intrinsic::lifetime_end : Intrinsic::invariant_end;
+
auto Args = F->getFunctionType()->params();
- Type* ObjectPtr[1] = {Args[2]};
- if (F->getName() !=
- Intrinsic::getName(Intrinsic::invariant_end, ObjectPtr)) {
+ Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
+ if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::invariant_end, ObjectPtr);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
return true;
}
}
@@ -202,18 +472,72 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
return true;
}
}
+ // Renaming gather/scatter intrinsics with no address space overloading
+ // to the new overload which includes an address space
+ if (Name.startswith("masked.gather.")) {
+ Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
+ if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::masked_gather, Tys);
+ return true;
+ }
+ }
+ if (Name.startswith("masked.scatter.")) {
+ auto Args = F->getFunctionType()->params();
+ Type *Tys[] = {Args[0], Args[1]};
+ if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::masked_scatter, Tys);
+ return true;
+ }
+ }
break;
}
+ case 'n': {
+ if (Name.startswith("nvvm.")) {
+ Name = Name.substr(5);
+
+ // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
+ Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
+ .Cases("brev32", "brev64", Intrinsic::bitreverse)
+ .Case("clz.i", Intrinsic::ctlz)
+ .Case("popc.i", Intrinsic::ctpop)
+ .Default(Intrinsic::not_intrinsic);
+ if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
+ {F->getReturnType()});
+ return true;
+ }
+ // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
+ // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
+ //
+ // TODO: We could add lohi.i2d.
+ bool Expand = StringSwitch<bool>(Name)
+ .Cases("abs.i", "abs.ll", true)
+ .Cases("clz.ll", "popc.ll", "h2f", true)
+ .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
+ .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
+ .Default(false);
+ if (Expand) {
+ NewFn = nullptr;
+ return true;
+ }
+ }
+ break;
+ }
case 'o':
// We only need to change the name to match the mangling including the
// address space.
- if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
+ if (Name.startswith("objectsize.")) {
Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
- if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
+ if (F->arg_size() == 2 ||
+ F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::objectsize, Tys);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
+ Tys);
return true;
}
}
@@ -226,236 +550,15 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
- case 'x': {
- bool IsX86 = Name.startswith("x86.");
- if (IsX86)
- Name = Name.substr(4);
-
- // All of the intrinsics matches below should be marked with which llvm
- // version started autoupgrading them. At some point in the future we would
- // like to use this information to remove upgrade code for some older
- // intrinsics. It is currently undecided how we will determine that future
- // point.
- if (IsX86 &&
- (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
- Name.startswith("sse2.pcmpgt.") || // Added in 3.1
- Name.startswith("avx2.pcmpeq.") || // Added in 3.1
- Name.startswith("avx2.pcmpgt.") || // Added in 3.1
- Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
- Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
- Name == "sse.add.ss" || // Added in 4.0
- Name == "sse2.add.sd" || // Added in 4.0
- Name == "sse.sub.ss" || // Added in 4.0
- Name == "sse2.sub.sd" || // Added in 4.0
- Name == "sse.mul.ss" || // Added in 4.0
- Name == "sse2.mul.sd" || // Added in 4.0
- Name == "sse.div.ss" || // Added in 4.0
- Name == "sse2.div.sd" || // Added in 4.0
- Name == "sse41.pmaxsb" || // Added in 3.9
- Name == "sse2.pmaxs.w" || // Added in 3.9
- Name == "sse41.pmaxsd" || // Added in 3.9
- Name == "sse2.pmaxu.b" || // Added in 3.9
- Name == "sse41.pmaxuw" || // Added in 3.9
- Name == "sse41.pmaxud" || // Added in 3.9
- Name == "sse41.pminsb" || // Added in 3.9
- Name == "sse2.pmins.w" || // Added in 3.9
- Name == "sse41.pminsd" || // Added in 3.9
- Name == "sse2.pminu.b" || // Added in 3.9
- Name == "sse41.pminuw" || // Added in 3.9
- Name == "sse41.pminud" || // Added in 3.9
- Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
- Name.startswith("avx2.pmax") || // Added in 3.9
- Name.startswith("avx2.pmin") || // Added in 3.9
- Name.startswith("avx512.mask.pmax") || // Added in 4.0
- Name.startswith("avx512.mask.pmin") || // Added in 4.0
- Name.startswith("avx2.vbroadcast") || // Added in 3.8
- Name.startswith("avx2.pbroadcast") || // Added in 3.8
- Name.startswith("avx.vpermil.") || // Added in 3.1
- Name.startswith("sse2.pshuf") || // Added in 3.9
- Name.startswith("avx512.pbroadcast") || // Added in 3.9
- Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
- Name.startswith("avx512.mask.movddup") || // Added in 3.9
- Name.startswith("avx512.mask.movshdup") || // Added in 3.9
- Name.startswith("avx512.mask.movsldup") || // Added in 3.9
- Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
- Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
- Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
- Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
- Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
- Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
- Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
- Name.startswith("avx512.mask.punpckl") || // Added in 3.9
- Name.startswith("avx512.mask.punpckh") || // Added in 3.9
- Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
- Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
- Name.startswith("avx512.mask.pand.") || // Added in 3.9
- Name.startswith("avx512.mask.pandn.") || // Added in 3.9
- Name.startswith("avx512.mask.por.") || // Added in 3.9
- Name.startswith("avx512.mask.pxor.") || // Added in 3.9
- Name.startswith("avx512.mask.and.") || // Added in 3.9
- Name.startswith("avx512.mask.andn.") || // Added in 3.9
- Name.startswith("avx512.mask.or.") || // Added in 3.9
- Name.startswith("avx512.mask.xor.") || // Added in 3.9
- Name.startswith("avx512.mask.padd.") || // Added in 4.0
- Name.startswith("avx512.mask.psub.") || // Added in 4.0
- Name.startswith("avx512.mask.pmull.") || // Added in 4.0
- Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
- Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
- Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
- Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
- Name == "avx512.mask.add.pd.128" || // Added in 4.0
- Name == "avx512.mask.add.pd.256" || // Added in 4.0
- Name == "avx512.mask.add.ps.128" || // Added in 4.0
- Name == "avx512.mask.add.ps.256" || // Added in 4.0
- Name == "avx512.mask.div.pd.128" || // Added in 4.0
- Name == "avx512.mask.div.pd.256" || // Added in 4.0
- Name == "avx512.mask.div.ps.128" || // Added in 4.0
- Name == "avx512.mask.div.ps.256" || // Added in 4.0
- Name == "avx512.mask.mul.pd.128" || // Added in 4.0
- Name == "avx512.mask.mul.pd.256" || // Added in 4.0
- Name == "avx512.mask.mul.ps.128" || // Added in 4.0
- Name == "avx512.mask.mul.ps.256" || // Added in 4.0
- Name == "avx512.mask.sub.pd.128" || // Added in 4.0
- Name == "avx512.mask.sub.pd.256" || // Added in 4.0
- Name == "avx512.mask.sub.ps.128" || // Added in 4.0
- Name == "avx512.mask.sub.ps.256" || // Added in 4.0
- Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
- Name.startswith("avx512.mask.psll.d") || // Added in 4.0
- Name.startswith("avx512.mask.psll.q") || // Added in 4.0
- Name.startswith("avx512.mask.psll.w") || // Added in 4.0
- Name.startswith("avx512.mask.psra.d") || // Added in 4.0
- Name.startswith("avx512.mask.psra.q") || // Added in 4.0
- Name.startswith("avx512.mask.psra.w") || // Added in 4.0
- Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
- Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
- Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
- Name.startswith("avx512.mask.pslli") || // Added in 4.0
- Name.startswith("avx512.mask.psrai") || // Added in 4.0
- Name.startswith("avx512.mask.psrli") || // Added in 4.0
- Name.startswith("avx512.mask.psllv") || // Added in 4.0
- Name.startswith("avx512.mask.psrav") || // Added in 4.0
- Name.startswith("avx512.mask.psrlv") || // Added in 4.0
- Name.startswith("sse41.pmovsx") || // Added in 3.8
- Name.startswith("sse41.pmovzx") || // Added in 3.9
- Name.startswith("avx2.pmovsx") || // Added in 3.9
- Name.startswith("avx2.pmovzx") || // Added in 3.9
- Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
- Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
- Name == "sse2.cvtdq2pd" || // Added in 3.9
- Name == "sse2.cvtps2pd" || // Added in 3.9
- Name == "avx.cvtdq2.pd.256" || // Added in 3.9
- Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
- Name.startswith("avx.vinsertf128.") || // Added in 3.7
- Name == "avx2.vinserti128" || // Added in 3.7
- Name.startswith("avx512.mask.insert") || // Added in 4.0
- Name.startswith("avx.vextractf128.") || // Added in 3.7
- Name == "avx2.vextracti128" || // Added in 3.7
- Name.startswith("avx512.mask.vextract") || // Added in 4.0
- Name.startswith("sse4a.movnt.") || // Added in 3.9
- Name.startswith("avx.movnt.") || // Added in 3.2
- Name.startswith("avx512.storent.") || // Added in 3.9
- Name == "sse2.storel.dq" || // Added in 3.9
- Name.startswith("sse.storeu.") || // Added in 3.9
- Name.startswith("sse2.storeu.") || // Added in 3.9
- Name.startswith("avx.storeu.") || // Added in 3.9
- Name.startswith("avx512.mask.storeu.") || // Added in 3.9
- Name.startswith("avx512.mask.store.p") || // Added in 3.9
- Name.startswith("avx512.mask.store.b.") || // Added in 3.9
- Name.startswith("avx512.mask.store.w.") || // Added in 3.9
- Name.startswith("avx512.mask.store.d.") || // Added in 3.9
- Name.startswith("avx512.mask.store.q.") || // Added in 3.9
- Name.startswith("avx512.mask.loadu.") || // Added in 3.9
- Name.startswith("avx512.mask.load.") || // Added in 3.9
- Name == "sse42.crc32.64.8" || // Added in 3.4
- Name.startswith("avx.vbroadcast.s") || // Added in 3.5
- Name.startswith("avx512.mask.palignr.") || // Added in 3.9
- Name.startswith("avx512.mask.valign.") || // Added in 4.0
- Name.startswith("sse2.psll.dq") || // Added in 3.7
- Name.startswith("sse2.psrl.dq") || // Added in 3.7
- Name.startswith("avx2.psll.dq") || // Added in 3.7
- Name.startswith("avx2.psrl.dq") || // Added in 3.7
- Name.startswith("avx512.psll.dq") || // Added in 3.9
- Name.startswith("avx512.psrl.dq") || // Added in 3.9
- Name == "sse41.pblendw" || // Added in 3.7
- Name.startswith("sse41.blendp") || // Added in 3.7
- Name.startswith("avx.blend.p") || // Added in 3.7
- Name == "avx2.pblendw" || // Added in 3.7
- Name.startswith("avx2.pblendd.") || // Added in 3.7
- Name.startswith("avx.vbroadcastf128") || // Added in 4.0
- Name == "avx2.vbroadcasti128" || // Added in 3.7
- Name == "xop.vpcmov" || // Added in 3.8
- Name.startswith("avx512.mask.move.s") || // Added in 4.0
- (Name.startswith("xop.vpcom") && // Added in 3.2
- F->arg_size() == 2))) {
- NewFn = nullptr;
+ case 'x':
+ if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
return true;
- }
- // SSE4.1 ptest functions may have an old signature.
- if (IsX86 && Name.startswith("sse41.ptest")) { // Added in 3.2
- if (Name.substr(11) == "c")
- return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
- if (Name.substr(11) == "z")
- return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
- if (Name.substr(11) == "nzc")
- return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
- }
- // Several blend and other instructions with masks used the wrong number of
- // bits.
- if (IsX86 && Name == "sse41.insertps") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
- NewFn);
- if (IsX86 && Name == "sse41.dppd") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
- NewFn);
- if (IsX86 && Name == "sse41.dpps") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
- NewFn);
- if (IsX86 && Name == "sse41.mpsadbw") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
- NewFn);
- if (IsX86 && Name == "avx.dp.ps.256") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
- NewFn);
- if (IsX86 && Name == "avx2.mpsadbw") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
- NewFn);
-
- // frcz.ss/sd may need to have an argument dropped. Added in 3.2
- if (IsX86 && Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::x86_xop_vfrcz_ss);
- return true;
- }
- if (IsX86 && Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::x86_xop_vfrcz_sd);
- return true;
- }
- // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
- if (IsX86 && Name.startswith("xop.vpermil2")) { // Added in 3.9
- auto Params = F->getFunctionType()->params();
- auto Idx = Params[2];
- if (Idx->getScalarType()->isFloatingPointTy()) {
- rename(F);
- unsigned IdxSize = Idx->getPrimitiveSizeInBits();
- unsigned EltSize = Idx->getScalarSizeInBits();
- Intrinsic::ID Permil2ID;
- if (EltSize == 64 && IdxSize == 128)
- Permil2ID = Intrinsic::x86_xop_vpermil2pd;
- else if (EltSize == 32 && IdxSize == 128)
- Permil2ID = Intrinsic::x86_xop_vpermil2ps;
- else if (EltSize == 64 && IdxSize == 256)
- Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
- else
- Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
- NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
- return true;
- }
- }
- break;
}
+ // Remangle our intrinsic since we upgrade the mangling
+ auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
+ if (Result != None) {
+ NewFn = Result.getValue();
+ return true;
}
// This may not belong here. This function is effectively being overloaded
@@ -685,12 +788,30 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
}
static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
- ICmpInst::Predicate Pred) {
+ unsigned CC, bool Signed) {
Value *Op0 = CI.getArgOperand(0);
unsigned NumElts = Op0->getType()->getVectorNumElements();
- Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
- Value *Mask = CI.getArgOperand(2);
+ Value *Cmp;
+ if (CC == 3) {
+ Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
+ } else if (CC == 7) {
+ Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
+ } else {
+ ICmpInst::Predicate Pred;
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case 0: Pred = ICmpInst::ICMP_EQ; break;
+ case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ case 4: Pred = ICmpInst::ICMP_NE; break;
+ case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ }
+ Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
+ }
+
+ Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
const auto *C = dyn_cast<Constant>(Mask);
if (!C || !C->isAllOnesValue())
Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
@@ -733,6 +854,15 @@ static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
return Builder.CreateInsertElement(A, Select, (uint64_t)0);
}
+
+static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
+ Value* Op = CI.getArgOperand(0);
+ Type* ReturnOp = CI.getType();
+ unsigned NumElts = CI.getType()->getVectorNumElements();
+ Value *Mask = getX86MaskVec(Builder, Op, NumElts);
+ return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
+}
+
/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
@@ -753,6 +883,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
bool IsX86 = Name.startswith("x86.");
if (IsX86)
Name = Name.substr(4);
+ bool IsNVVM = Name.startswith("nvvm.");
+ if (IsNVVM)
+ Name = Name.substr(5);
if (IsX86 && Name.startswith("sse4a.movnt.")) {
Module *M = F->getParent();
@@ -838,18 +971,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
return;
}
- if (IsX86 && (Name.startswith("avx512.mask.storeu."))) {
+ if (IsX86 && (Name.startswith("avx512.mask.store"))) {
+ // "avx512.mask.storeu." or "avx512.mask.store."
+ bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), /*Aligned*/false);
-
- // Remove intrinsic.
- CI->eraseFromParent();
- return;
- }
-
- if (IsX86 && (Name.startswith("avx512.mask.store."))) {
- UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), /*Aligned*/true);
+ CI->getArgOperand(2), Aligned);
// Remove intrinsic.
CI->eraseFromParent();
@@ -858,15 +984,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Rep;
// Upgrade packed integer vector compare intrinsics to compare instructions.
- if (IsX86 && (Name.startswith("sse2.pcmpeq.") ||
- Name.startswith("avx2.pcmpeq."))) {
- Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
- "pcmpeq");
- Rep = Builder.CreateSExt(Rep, CI->getType(), "");
- } else if (IsX86 && (Name.startswith("sse2.pcmpgt.") ||
- Name.startswith("avx2.pcmpgt."))) {
- Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
- "pcmpgt");
+ if (IsX86 && (Name.startswith("sse2.pcmp") ||
+ Name.startswith("avx2.pcmp"))) {
+ // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
+ bool CmpEq = Name[9] == 'e';
+ Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
+ CI->getArgOperand(0), CI->getArgOperand(1));
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
} else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
Type *I32Ty = Type::getInt32Ty(C);
@@ -904,10 +1027,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
Builder.CreateFDiv(Elt0, Elt1),
ConstantInt::get(I32Ty, 0));
- } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) {
- Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
- } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) {
- Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
+ } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
+ // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
+ bool CmpEq = Name[16] == 'e';
+ Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
+ } else if (IsX86 && Name.startswith("avx512.mask.cmp")) {
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
+ } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
} else if (IsX86 && (Name == "sse41.pmaxsb" ||
Name == "sse2.pmaxs.w" ||
Name == "sse41.pmaxsd" ||
@@ -1019,15 +1148,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep =
Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
Builder.getInt8(Imm)});
- } else if (IsX86 && Name == "xop.vpcmov") {
- Value *Arg0 = CI->getArgOperand(0);
- Value *Arg1 = CI->getArgOperand(1);
+ } else if (IsX86 && Name.startswith("xop.vpcmov")) {
Value *Sel = CI->getArgOperand(2);
- unsigned NumElts = CI->getType()->getVectorNumElements();
- Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
- Value *NotSel = Builder.CreateXor(Sel, MinusOne);
- Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
- Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
+ Value *NotSel = Builder.CreateNot(Sel);
+ Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
+ Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
Rep = Builder.CreateOr(Sel0, Sel1);
} else if (IsX86 && Name == "sse42.crc32.64.8") {
Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
@@ -1461,6 +1586,43 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::ctlz,
+ CI->getType()),
+ { CI->getArgOperand(0), Builder.getInt1(false) });
+ Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
+ CI->getArgOperand(1));
+ } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
+ Name.startswith("avx512.mask.min.p"))) {
+ bool IsMin = Name[13] == 'i';
+ VectorType *VecTy = cast<VectorType>(CI->getType());
+ unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
+ unsigned EltWidth = VecTy->getScalarSizeInBits();
+ Intrinsic::ID IID;
+ if (!IsMin && VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_sse_max_ps;
+ else if (!IsMin && VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_sse2_max_pd;
+ else if (!IsMin && VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_avx_max_ps_256;
+ else if (!IsMin && VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_avx_max_pd_256;
+ else if (IsMin && VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_sse_min_ps;
+ else if (IsMin && VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_sse2_min_pd;
+ else if (IsMin && VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_avx_min_ps_256;
+ else if (IsMin && VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_avx_min_pd_256;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ { CI->getArgOperand(0), CI->getArgOperand(1) });
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
VectorType *VecTy = cast<VectorType>(CI->getType());
Intrinsic::ID IID;
@@ -1501,6 +1663,42 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
{ CI->getArgOperand(0), CI->getArgOperand(1) });
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.pack")) {
+ bool IsUnsigned = Name[16] == 'u';
+ bool IsDW = Name[18] == 'd';
+ VectorType *VecTy = cast<VectorType>(CI->getType());
+ Intrinsic::ID IID;
+ if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
+ IID = Intrinsic::x86_sse2_packsswb_128;
+ else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
+ IID = Intrinsic::x86_avx2_packsswb;
+ else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
+ IID = Intrinsic::x86_avx512_packsswb_512;
+ else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
+ IID = Intrinsic::x86_sse2_packssdw_128;
+ else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
+ IID = Intrinsic::x86_avx2_packssdw;
+ else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
+ IID = Intrinsic::x86_avx512_packssdw_512;
+ else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
+ IID = Intrinsic::x86_sse2_packuswb_128;
+ else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
+ IID = Intrinsic::x86_avx2_packuswb;
+ else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
+ IID = Intrinsic::x86_avx512_packuswb_512;
+ else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
+ IID = Intrinsic::x86_sse41_packusdw;
+ else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
+ IID = Intrinsic::x86_avx2_packusdw;
+ else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
+ IID = Intrinsic::x86_avx512_packusdw_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ { CI->getArgOperand(0), CI->getArgOperand(1) });
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.psll")) {
bool IsImmediate = Name[16] == 'i' ||
(Name.size() > 18 && Name[18] == 'i');
@@ -1705,6 +1903,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
} else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
Rep = upgradeMaskedMove(Builder, *CI);
+ } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
+ Rep = UpgradeMaskToInt(Builder, *CI);
} else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
Intrinsic::ID IID;
if (Name.endswith("ps.128"))
@@ -1727,6 +1927,64 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
{ CI->getArgOperand(0), CI->getArgOperand(1) });
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
+ } else if (IsX86 && Name.endswith(".movntdqa")) {
+ Module *M = F->getParent();
+ MDNode *Node = MDNode::get(
+ C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
+
+ Value *Ptr = CI->getArgOperand(0);
+ VectorType *VTy = cast<VectorType>(CI->getType());
+
+ // Convert the type of the pointer to a pointer to the stored type.
+ Value *BC =
+ Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
+ LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
+ LI->setMetadata(M->getMDKindID("nontemporal"), Node);
+ Rep = LI;
+ } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
+ Value *Arg = CI->getArgOperand(0);
+ Value *Neg = Builder.CreateNeg(Arg, "neg");
+ Value *Cmp = Builder.CreateICmpSGE(
+ Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
+ Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
+ } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
+ Name == "max.ui" || Name == "max.ull")) {
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+ Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
+ ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
+ : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
+ Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
+ } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
+ Name == "min.ui" || Name == "min.ull")) {
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+ Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
+ ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
+ : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
+ Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
+ } else if (IsNVVM && Name == "clz.ll") {
+ // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
+ Value *Arg = CI->getArgOperand(0);
+ Value *Ctlz = Builder.CreateCall(
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ {Arg->getType()}),
+ {Arg, Builder.getFalse()}, "ctlz");
+ Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
+ } else if (IsNVVM && Name == "popc.ll") {
+ // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
+ // i64.
+ Value *Arg = CI->getArgOperand(0);
+ Value *Popc = Builder.CreateCall(
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
+ {Arg->getType()}),
+ Arg, "ctpop");
+ Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
+ } else if (IsNVVM && Name == "h2f") {
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::convert_from_fp16,
+ {Builder.getFloatTy()}),
+ CI->getArgOperand(0), "h2f");
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
@@ -1737,13 +1995,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
return;
}
- std::string Name = CI->getName();
- if (!Name.empty())
- CI->setName(Name + ".old");
-
+ CallInst *NewCall = nullptr;
switch (NewFn->getIntrinsicID()) {
- default:
- llvm_unreachable("Unknown function for CallInst upgrade.");
+ default: {
+ // Handle generic mangling change, but nothing else
+ assert(
+ (CI->getCalledFunction()->getName() != NewFn->getName()) &&
+ "Unknown function for CallInst upgrade and isn't just a name change");
+ CI->setCalledFunction(NewFn);
+ return;
+ }
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
@@ -1761,43 +2022,43 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::arm_neon_vst4lane: {
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
CI->arg_operands().end());
- CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, Args);
+ break;
}
case Intrinsic::bitreverse:
- CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
+ break;
case Intrinsic::ctlz:
case Intrinsic::cttz:
assert(CI->getNumArgOperands() == 1 &&
"Mismatch between function args and call args");
- CI->replaceAllUsesWith(Builder.CreateCall(
- NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
- CI->eraseFromParent();
- return;
-
- case Intrinsic::objectsize:
- CI->replaceAllUsesWith(Builder.CreateCall(
- NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
- CI->eraseFromParent();
- return;
+ NewCall =
+ Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
+ break;
- case Intrinsic::ctpop: {
- CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
- CI->eraseFromParent();
- return;
+ case Intrinsic::objectsize: {
+ Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
+ ? Builder.getFalse()
+ : CI->getArgOperand(2);
+ NewCall = Builder.CreateCall(
+ NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
+ break;
}
+ case Intrinsic::ctpop:
+ NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
+ break;
+
+ case Intrinsic::convert_from_fp16:
+ NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
+ break;
+
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:
- CI->replaceAllUsesWith(
- Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
+ break;
case Intrinsic::x86_xop_vpermil2pd:
case Intrinsic::x86_xop_vpermil2ps:
@@ -1808,9 +2069,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
- CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name));
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, Args);
+ break;
}
case Intrinsic::x86_sse41_ptestc:
@@ -1832,10 +2092,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
- CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
- CI->replaceAllUsesWith(NewCall);
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
+ break;
}
case Intrinsic::x86_sse41_insertps:
@@ -1851,30 +2109,36 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Replace the last argument with a trunc.
Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
-
- CallInst *NewCall = Builder.CreateCall(NewFn, Args);
- CI->replaceAllUsesWith(NewCall);
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, Args);
+ break;
}
case Intrinsic::thread_pointer: {
- CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {}));
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, {});
+ break;
}
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::masked_load:
- case Intrinsic::masked_store: {
+ case Intrinsic::masked_store:
+ case Intrinsic::masked_gather:
+ case Intrinsic::masked_scatter: {
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
CI->arg_operands().end());
- CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, Args);
+ break;
}
}
+ assert(NewCall && "Should have either set this variable or returned through "
+ "the default case");
+ std::string Name = CI->getName();
+ if (!Name.empty()) {
+ CI->setName(Name + ".old");
+ NewCall->setName(Name);
+ }
+ CI->replaceAllUsesWith(NewCall);
+ CI->eraseFromParent();
}
void llvm::UpgradeCallsToIntrinsic(Function *F) {
@@ -1975,14 +2239,14 @@ bool llvm::UpgradeDebugInfo(Module &M) {
}
bool llvm::UpgradeModuleFlags(Module &M) {
- const NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
+ NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
if (!ModFlags)
return false;
- bool HasObjCFlag = false, HasClassProperties = false;
+ bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
MDNode *Op = ModFlags->getOperand(I);
- if (Op->getNumOperands() < 2)
+ if (Op->getNumOperands() != 3)
continue;
MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
if (!ID)
@@ -1991,7 +2255,24 @@ bool llvm::UpgradeModuleFlags(Module &M) {
HasObjCFlag = true;
if (ID->getString() == "Objective-C Class Properties")
HasClassProperties = true;
+ // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
+ // field was Error and now they are Max.
+ if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
+ if (auto *Behavior =
+ mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
+ if (Behavior->getLimitedValue() == Module::Error) {
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ Metadata *Ops[3] = {
+ ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
+ MDString::get(M.getContext(), ID->getString()),
+ Op->getOperand(2)};
+ ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
+ Changed = true;
+ }
+ }
+ }
}
+
// "Objective-C Class Properties" is recently added for Objective-C. We
// upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
// flag of value 0, so we can correclty downgrade this flag when trying to
@@ -2000,9 +2281,10 @@ bool llvm::UpgradeModuleFlags(Module &M) {
if (HasObjCFlag && !HasClassProperties) {
M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
(uint32_t)0);
- return true;
+ Changed = true;
}
- return false;
+
+ return Changed;
}
static bool isOldLoopArgument(Metadata *MD) {
OpenPOWER on IntegriCloud