summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/X86/X86FastISel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86FastISel.cpp')
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp331
1 files changed, 240 insertions, 91 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index dfe3c80..c890fdd 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -170,6 +170,12 @@ private:
const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
X86AddressMode &AM);
+
+ unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill, unsigned Op3,
+ bool Op3IsKill);
};
} // end anonymous namespace.
@@ -182,18 +188,18 @@ getX86ConditionCode(CmpInst::Predicate Predicate) {
default: break;
// Floating-point Predicates
case CmpInst::FCMP_UEQ: CC = X86::COND_E; break;
- case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH;
case CmpInst::FCMP_OGT: CC = X86::COND_A; break;
- case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH;
case CmpInst::FCMP_OGE: CC = X86::COND_AE; break;
- case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH;
case CmpInst::FCMP_ULT: CC = X86::COND_B; break;
- case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH;
case CmpInst::FCMP_ULE: CC = X86::COND_BE; break;
case CmpInst::FCMP_ONE: CC = X86::COND_NE; break;
case CmpInst::FCMP_UNO: CC = X86::COND_P; break;
case CmpInst::FCMP_ORD: CC = X86::COND_NP; break;
- case CmpInst::FCMP_OEQ: // fall-through
+ case CmpInst::FCMP_OEQ: LLVM_FALLTHROUGH;
case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
// Integer Predicates
@@ -229,15 +235,15 @@ getX86SSEConditionCode(CmpInst::Predicate Predicate) {
switch (Predicate) {
default: llvm_unreachable("Unexpected predicate");
case CmpInst::FCMP_OEQ: CC = 0; break;
- case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
case CmpInst::FCMP_OLT: CC = 1; break;
- case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
case CmpInst::FCMP_OLE: CC = 2; break;
case CmpInst::FCMP_UNO: CC = 3; break;
case CmpInst::FCMP_UNE: CC = 4; break;
- case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
case CmpInst::FCMP_UGE: CC = 5; break;
- case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
case CmpInst::FCMP_UGT: CC = 6; break;
case CmpInst::FCMP_ORD: CC = 7; break;
case CmpInst::FCMP_UEQ:
@@ -351,6 +357,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
bool HasSSE41 = Subtarget->hasSSE41();
bool HasAVX = Subtarget->hasAVX();
bool HasAVX2 = Subtarget->hasAVX2();
+ bool HasAVX512 = Subtarget->hasAVX512();
+ bool HasVLX = Subtarget->hasVLX();
bool IsNonTemporal = MMO && MMO->isNonTemporal();
// Get opcode and regclass of the output for the given load instruction.
@@ -378,7 +386,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
break;
case MVT::f32:
if (X86ScalarSSEf32) {
- Opc = HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
+ Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
RC = &X86::FR32RegClass;
} else {
Opc = X86::LD_Fp32m;
@@ -387,7 +395,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
break;
case MVT::f64:
if (X86ScalarSSEf64) {
- Opc = HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
+ Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
RC = &X86::FR64RegClass;
} else {
Opc = X86::LD_Fp64m;
@@ -399,20 +407,26 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
return false;
case MVT::v4f32:
if (IsNonTemporal && Alignment >= 16 && HasSSE41)
- Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
+ Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
+ HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
else if (Alignment >= 16)
- Opc = HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
+ Opc = HasVLX ? X86::VMOVAPSZ128rm :
+ HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
else
- Opc = HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
+ Opc = HasVLX ? X86::VMOVUPSZ128rm :
+ HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
RC = &X86::VR128RegClass;
break;
case MVT::v2f64:
if (IsNonTemporal && Alignment >= 16 && HasSSE41)
- Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
+ Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
+ HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
else if (Alignment >= 16)
- Opc = HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
+ Opc = HasVLX ? X86::VMOVAPDZ128rm :
+ HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
else
- Opc = HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
+ Opc = HasVLX ? X86::VMOVUPDZ128rm :
+ HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
RC = &X86::VR128RegClass;
break;
case MVT::v4i32:
@@ -420,27 +434,34 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
case MVT::v8i16:
case MVT::v16i8:
if (IsNonTemporal && Alignment >= 16)
- Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
+ Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
+ HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
else if (Alignment >= 16)
- Opc = HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
+ Opc = HasVLX ? X86::VMOVDQA64Z128rm :
+ HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
else
- Opc = HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
+ Opc = HasVLX ? X86::VMOVDQU64Z128rm :
+ HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
RC = &X86::VR128RegClass;
break;
case MVT::v8f32:
assert(HasAVX);
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
- Opc = X86::VMOVNTDQAYrm;
+ Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
+ else if (Alignment >= 32)
+ Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
else
- Opc = (Alignment >= 32) ? X86::VMOVAPSYrm : X86::VMOVUPSYrm;
+ Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
RC = &X86::VR256RegClass;
break;
case MVT::v4f64:
assert(HasAVX);
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
Opc = X86::VMOVNTDQAYrm;
+ else if (Alignment >= 32)
+ Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
else
- Opc = (Alignment >= 32) ? X86::VMOVAPDYrm : X86::VMOVUPDYrm;
+ Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
RC = &X86::VR256RegClass;
break;
case MVT::v8i32:
@@ -450,12 +471,14 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
assert(HasAVX);
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
Opc = X86::VMOVNTDQAYrm;
+ else if (Alignment >= 32)
+ Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
else
- Opc = (Alignment >= 32) ? X86::VMOVDQAYrm : X86::VMOVDQUYrm;
+ Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
RC = &X86::VR256RegClass;
break;
case MVT::v16f32:
- assert(Subtarget->hasAVX512());
+ assert(HasAVX512);
if (IsNonTemporal && Alignment >= 64)
Opc = X86::VMOVNTDQAZrm;
else
@@ -463,7 +486,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
RC = &X86::VR512RegClass;
break;
case MVT::v8f64:
- assert(Subtarget->hasAVX512());
+ assert(HasAVX512);
if (IsNonTemporal && Alignment >= 64)
Opc = X86::VMOVNTDQAZrm;
else
@@ -474,7 +497,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
case MVT::v16i32:
case MVT::v32i16:
case MVT::v64i8:
- assert(Subtarget->hasAVX512());
+ assert(HasAVX512);
// Note: There are a lot more choices based on type with AVX-512, but
// there's really no advantage when the load isn't masked.
if (IsNonTemporal && Alignment >= 64)
@@ -504,6 +527,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
bool HasSSE2 = Subtarget->hasSSE2();
bool HasSSE4A = Subtarget->hasSSE4A();
bool HasAVX = Subtarget->hasAVX();
+ bool HasAVX512 = Subtarget->hasAVX512();
+ bool HasVLX = Subtarget->hasVLX();
bool IsNonTemporal = MMO && MMO->isNonTemporal();
// Get opcode and regclass of the output for the given store instruction.
@@ -518,8 +543,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
TII.get(X86::AND8ri), AndResult)
.addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
ValReg = AndResult;
+ LLVM_FALLTHROUGH; // handle i1 as i8.
}
- // FALLTHROUGH, handling i1 as i8.
case MVT::i8: Opc = X86::MOV8mr; break;
case MVT::i16: Opc = X86::MOV16mr; break;
case MVT::i32:
@@ -534,7 +559,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
if (IsNonTemporal && HasSSE4A)
Opc = X86::MOVNTSS;
else
- Opc = HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
+ Opc = HasAVX512 ? X86::VMOVSSZmr :
+ HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
} else
Opc = X86::ST_Fp32m;
break;
@@ -543,27 +569,34 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
if (IsNonTemporal && HasSSE4A)
Opc = X86::MOVNTSD;
else
- Opc = HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
+ Opc = HasAVX512 ? X86::VMOVSDZmr :
+ HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
} else
Opc = X86::ST_Fp64m;
break;
case MVT::v4f32:
if (Aligned) {
if (IsNonTemporal)
- Opc = HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
+ Opc = HasVLX ? X86::VMOVNTPSZ128mr :
+ HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
else
- Opc = HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ Opc = HasVLX ? X86::VMOVAPSZ128mr :
+ HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
} else
- Opc = HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
+ Opc = HasVLX ? X86::VMOVUPSZ128mr :
+ HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
break;
case MVT::v2f64:
if (Aligned) {
if (IsNonTemporal)
- Opc = HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
+ Opc = HasVLX ? X86::VMOVNTPDZ128mr :
+ HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
else
- Opc = HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
+ Opc = HasVLX ? X86::VMOVAPDZ128mr :
+ HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
} else
- Opc = HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
+ Opc = HasVLX ? X86::VMOVUPDZ128mr :
+ HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
break;
case MVT::v4i32:
case MVT::v2i64:
@@ -571,45 +604,57 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
case MVT::v16i8:
if (Aligned) {
if (IsNonTemporal)
- Opc = HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
+ Opc = HasVLX ? X86::VMOVNTDQZ128mr :
+ HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
else
- Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
+ Opc = HasVLX ? X86::VMOVDQA64Z128mr :
+ HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
} else
- Opc = HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
+ Opc = HasVLX ? X86::VMOVDQU64Z128mr :
+ HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
break;
case MVT::v8f32:
assert(HasAVX);
- if (Aligned)
- Opc = IsNonTemporal ? X86::VMOVNTPSYmr : X86::VMOVAPSYmr;
- else
- Opc = X86::VMOVUPSYmr;
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
+ else
+ Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
+ } else
+ Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
break;
case MVT::v4f64:
assert(HasAVX);
if (Aligned) {
- Opc = IsNonTemporal ? X86::VMOVNTPDYmr : X86::VMOVAPDYmr;
+ if (IsNonTemporal)
+ Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
+ else
+ Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
} else
- Opc = X86::VMOVUPDYmr;
+ Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
break;
case MVT::v8i32:
case MVT::v4i64:
case MVT::v16i16:
case MVT::v32i8:
assert(HasAVX);
- if (Aligned)
- Opc = IsNonTemporal ? X86::VMOVNTDQYmr : X86::VMOVDQAYmr;
- else
- Opc = X86::VMOVDQUYmr;
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
+ else
+ Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
+ } else
+ Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
break;
case MVT::v16f32:
- assert(Subtarget->hasAVX512());
+ assert(HasAVX512);
if (Aligned)
Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
else
Opc = X86::VMOVUPSZmr;
break;
case MVT::v8f64:
- assert(Subtarget->hasAVX512());
+ assert(HasAVX512);
if (Aligned) {
Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
} else
@@ -619,7 +664,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
case MVT::v16i32:
case MVT::v32i16:
case MVT::v64i8:
- assert(Subtarget->hasAVX512());
+ assert(HasAVX512);
// Note: There are a lot more choices based on type with AVX-512, but
// there's really no advantage when the store isn't masked.
if (Aligned)
@@ -659,7 +704,9 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
bool Signed = true;
switch (VT.getSimpleVT().SimpleTy) {
default: break;
- case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8.
+ case MVT::i1:
+ Signed = false;
+ LLVM_FALLTHROUGH; // Handle as i8.
case MVT::i8: Opc = X86::MOV8mi; break;
case MVT::i16: Opc = X86::MOV16mi; break;
case MVT::i32: Opc = X86::MOV32mi; break;
@@ -895,7 +942,7 @@ redo_gep:
for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
i != e; ++i, ++GTI) {
const Value *Op = *i;
- if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
const StructLayout *SL = DL.getStructLayout(STy);
Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
continue;
@@ -1454,11 +1501,11 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
}
// FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
- static unsigned SETFOpcTable[2][3] = {
+ static const uint16_t SETFOpcTable[2][3] = {
{ X86::SETEr, X86::SETNPr, X86::AND8rr },
{ X86::SETNEr, X86::SETPr, X86::OR8rr }
};
- unsigned *SETFOpc = nullptr;
+ const uint16_t *SETFOpc = nullptr;
switch (Predicate) {
default: break;
case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
@@ -1511,7 +1558,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
- if (SrcVT.SimpleTy == MVT::i1) {
+ if (SrcVT == MVT::i1) {
// Set the high bits to zero.
ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
@@ -1601,7 +1648,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
switch (Predicate) {
default: break;
case CmpInst::FCMP_OEQ:
- std::swap(TrueMBB, FalseMBB); // fall-through
+ std::swap(TrueMBB, FalseMBB);
+ LLVM_FALLTHROUGH;
case CmpInst::FCMP_UNE:
NeedExtraBranch = true;
Predicate = CmpInst::FCMP_ONE;
@@ -1651,6 +1699,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
if (TestOpc) {
unsigned OpReg = getRegForValue(TI->getOperand(0));
if (OpReg == 0) return false;
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
.addReg(OpReg).addImm(1);
@@ -1688,8 +1737,17 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
unsigned OpReg = getRegForValue(BI->getCondition());
if (OpReg == 0) return false;
+ // In case OpReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
+ unsigned KOpReg = OpReg;
+ OpReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), OpReg)
+ .addReg(KOpReg);
+ }
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
- .addReg(OpReg).addImm(1);
+ .addReg(OpReg)
+ .addImm(1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
.addMBB(TrueMBB);
finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
@@ -1875,15 +1933,15 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
// Copy the zero into the appropriate sub/super/identical physical
// register. Unfortunately the operations needed are not uniform enough
// to fit neatly into the table above.
- if (VT.SimpleTy == MVT::i16) {
+ if (VT == MVT::i16) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Copy), TypeEntry.HighInReg)
.addReg(Zero32, 0, X86::sub_16bit);
- } else if (VT.SimpleTy == MVT::i32) {
+ } else if (VT == MVT::i32) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Copy), TypeEntry.HighInReg)
.addReg(Zero32);
- } else if (VT.SimpleTy == MVT::i64) {
+ } else if (VT == MVT::i64) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
.addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
@@ -1953,11 +2011,11 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
// FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
- static unsigned SETFOpcTable[2][3] = {
+ static const uint16_t SETFOpcTable[2][3] = {
{ X86::SETNPr, X86::SETEr , X86::TEST8rr },
{ X86::SETPr, X86::SETNEr, X86::OR8rr }
};
- unsigned *SETFOpc = nullptr;
+ const uint16_t *SETFOpc = nullptr;
switch (Predicate) {
default: break;
case CmpInst::FCMP_OEQ:
@@ -2023,8 +2081,17 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
return false;
bool CondIsKill = hasTrivialKill(Cond);
+ // In case OpReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
+ unsigned KCondReg = CondReg;
+ CondReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), CondReg)
+ .addReg(KCondReg, getKillRegState(CondIsKill));
+ }
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
- .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+ .addReg(CondReg, getKillRegState(CondIsKill))
+ .addImm(1);
}
const Value *LHS = I->getOperand(1);
@@ -2087,12 +2154,12 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
std::swap(CmpLHS, CmpRHS);
// Choose the SSE instruction sequence based on data type (float or double).
- static unsigned OpcTable[2][4] = {
- { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
- { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }
+ static const uint16_t OpcTable[2][4] = {
+ { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
+ { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
};
- unsigned *Opc = nullptr;
+ const uint16_t *Opc = nullptr;
switch (RetVT.SimpleTy) {
default: return false;
case MVT::f32: Opc = &OpcTable[0][0]; break;
@@ -2119,9 +2186,36 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
unsigned ResultReg;
-
- if (Subtarget->hasAVX()) {
- const TargetRegisterClass *FR32 = &X86::FR32RegClass;
+
+ if (Subtarget->hasAVX512()) {
+ // If we have AVX512 we can use a mask compare and masked movss/sd.
+ const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
+ const TargetRegisterClass *VK1 = &X86::VK1RegClass;
+
+ unsigned CmpOpcode =
+ (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
+ unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+
+ // Need an IMPLICIT_DEF for the input that is used to generate the upper
+ // bits of the result register since its not based on any of the inputs.
+ unsigned ImplicitDefReg = createResultReg(VR128X);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+
+ // Place RHSReg is the passthru of the masked movss/sd operation and put
+ // LHS in the input. The mask input comes from the compare.
+ unsigned MovOpcode =
+ (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
+ unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
+ CmpReg, true, ImplicitDefReg, true,
+ LHSReg, LHSIsKill);
+
+ ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
+
+ } else if (Subtarget->hasAVX()) {
const TargetRegisterClass *VR128 = &X86::VR128RegClass;
// If we have AVX, create 1 blendv instead of 3 logic instructions.
@@ -2130,11 +2224,11 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
// instructions as the AND/ANDN/OR sequence due to register moves, so
// don't bother.
unsigned CmpOpcode =
- (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
+ (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
unsigned BlendOpcode =
- (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
-
- unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill,
+ (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
+
+ unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
LHSReg, LHSIsKill, CmpReg, true);
@@ -2142,14 +2236,18 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
} else {
+ const TargetRegisterClass *VR128 = &X86::VR128RegClass;
unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
- unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
LHSReg, LHSIsKill);
- unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
RHSReg, RHSIsKill);
- ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
- AndReg, /*IsKill=*/true);
+ unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
+ AndReg, /*IsKill=*/true);
+ ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
}
updateValueMap(I, ResultReg);
return true;
@@ -2195,8 +2293,18 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
if (CondReg == 0)
return false;
bool CondIsKill = hasTrivialKill(Cond);
+
+ // In case OpReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
+ unsigned KCondReg = CondReg;
+ CondReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), CondReg)
+ .addReg(KCondReg, getKillRegState(CondIsKill));
+ }
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
- .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+ .addReg(CondReg, getKillRegState(CondIsKill))
+ .addImm(1);
}
const Value *LHS = I->getOperand(1);
@@ -2522,8 +2630,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// This needs to be set before we call getPtrSizedFrameRegister, otherwise
// we get the wrong frame register.
- MachineFrameInfo *MFI = MF->getFrameInfo();
- MFI->setFrameAddressIsTaken(true);
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ MFI.setFrameAddressIsTaken(true);
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
@@ -2698,7 +2806,9 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
const Function *Callee = II->getCalledFunction();
auto *Ty = cast<StructType>(Callee->getReturnType());
Type *RetTy = Ty->getTypeAtIndex(0U);
- Type *CondTy = Ty->getTypeAtIndex(1);
+ assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
+ Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
+ "Overflow value expected to be an i1");
MVT VT;
if (!isTypeLegal(RetTy, VT))
@@ -2808,7 +2918,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (!ResultReg)
return false;
- unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
+ // Assign to a GPR since the overflow return value is lowered to a SETcc.
+ unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
ResultReg2);
@@ -2966,7 +3077,7 @@ bool X86FastISel::fastLowerArguments() {
default: llvm_unreachable("Unexpected value type.");
case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
- case MVT::f32: // fall-through
+ case MVT::f32: LLVM_FALLTHROUGH;
case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
}
unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
@@ -3140,7 +3251,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
"Unexpected extend");
- if (ArgVT.SimpleTy == MVT::i1)
+ if (ArgVT == MVT::i1)
return false;
bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
@@ -3154,7 +3265,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
"Unexpected extend");
// Handle zero-extension from i1 to i8, which is common.
- if (ArgVT.SimpleTy == MVT::i1) {
+ if (ArgVT == MVT::i1) {
// Set the high bits to zero.
ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
ArgVT = MVT::i8;
@@ -3456,8 +3567,14 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
if (!SrcVT.isSimple() || !DstVT.isSimple())
return false;
- if (!SrcVT.is128BitVector() &&
- !(Subtarget->hasAVX() && SrcVT.is256BitVector()))
+ MVT SVT = SrcVT.getSimpleVT();
+ MVT DVT = DstVT.getSimpleVT();
+
+ if (!SVT.is128BitVector() &&
+ !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
+ !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
+ (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
+ DVT.getScalarSizeInBits() >= 32))))
return false;
unsigned Reg = getRegForValue(I->getOperand(0));
@@ -3505,7 +3622,7 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
unsigned Opc = 0;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
- case MVT::i1: VT = MVT::i8; // fall-through
+ case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH;
case MVT::i8: Opc = X86::MOV8ri; break;
case MVT::i16: Opc = X86::MOV16ri; break;
case MVT::i32: Opc = X86::MOV32ri; break;
@@ -3775,6 +3892,38 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
return true;
}
+unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill,
+ unsigned Op3, bool Op3IsKill) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+ Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
+ Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 3);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op2, getKillRegState(Op2IsKill))
+ .addReg(Op3, getKillRegState(Op3IsKill));
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op2, getKillRegState(Op2IsKill))
+ .addReg(Op3, getKillRegState(Op3IsKill));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
namespace llvm {
FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
OpenPOWER on IntegriCloud