summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/X86')
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp42
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp34
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp3
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h1
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp12
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp10
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp6
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp1
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp24
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp11
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp4
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp4
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp30
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp53
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td12
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp25
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp21
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp1474
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h26
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrAVX512.td446
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFormats.td8
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td19
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp95
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h3
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td4
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMPX.td70
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td26
-rw-r--r--contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h29
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.cpp41
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.td8
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp3
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h16
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.cpp11
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp16
-rw-r--r--contrib/llvm/lib/Target/X86/X86WinEHState.cpp311
36 files changed, 1808 insertions, 1095 deletions
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index a21f8c7..9eee4a0 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -315,7 +315,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg,
// Test (%SrcReg)
{
- const MCExpr *Disp = MCConstantExpr::Create(0, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(0, Ctx);
std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
getPointerWidth(), 0, Disp, SrcReg, 0, AccessSize, SMLoc(), SMLoc()));
InstrumentMemOperand(*Op, AccessSize, false /* IsWrite */, RegCtx, Ctx,
@@ -324,7 +324,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg,
// Test -1(%SrcReg, %CntReg, AccessSize)
{
- const MCExpr *Disp = MCConstantExpr::Create(-1, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(-1, Ctx);
std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
getPointerWidth(), 0, Disp, SrcReg, CntReg, AccessSize, SMLoc(),
SMLoc()));
@@ -334,7 +334,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg,
// Test (%DstReg)
{
- const MCExpr *Disp = MCConstantExpr::Create(0, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(0, Ctx);
std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
getPointerWidth(), 0, Disp, DstReg, 0, AccessSize, SMLoc(), SMLoc()));
InstrumentMemOperand(*Op, AccessSize, true /* IsWrite */, RegCtx, Ctx, Out);
@@ -342,7 +342,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg,
// Test -1(%DstReg, %CntReg, AccessSize)
{
- const MCExpr *Disp = MCConstantExpr::Create(-1, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(-1, Ctx);
std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
getPointerWidth(), 0, Disp, DstReg, CntReg, AccessSize, SMLoc(),
SMLoc()));
@@ -461,7 +461,7 @@ void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
while (Residue != 0) {
const MCConstantExpr *Disp =
- MCConstantExpr::Create(ApplyDisplacementBounds(Residue), Ctx);
+ MCConstantExpr::create(ApplyDisplacementBounds(Residue), Ctx);
std::unique_ptr<X86Operand> DispOp =
X86Operand::CreateMem(getPointerWidth(), 0, Disp, Reg, 0, 1, SMLoc(),
SMLoc());
@@ -493,7 +493,7 @@ X86AddressSanitizer::AddDisplacement(X86Operand &Op, int64_t Displacement,
CheckDisplacementBounds(NewDisplacement);
*Residue = Displacement - NewDisplacement;
- const MCExpr *Disp = MCConstantExpr::Create(NewDisplacement, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(NewDisplacement, Ctx);
return X86Operand::CreateMem(Op.getMemModeSize(), Op.getMemSegReg(), Disp,
Op.getMemBaseReg(), Op.getMemIndexReg(),
Op.getMemScale(), SMLoc(), SMLoc());
@@ -615,7 +615,7 @@ private:
const std::string &Fn = FuncName(AccessSize, IsWrite);
MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn));
const MCSymbolRefExpr *FnExpr =
- MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
+ MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr));
}
};
@@ -643,7 +643,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
MCInst Inst;
Inst.setOpcode(X86::MOV8rm);
Inst.addOperand(MCOperand::createReg(ShadowRegI8));
- const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1,
SMLoc(), SMLoc()));
@@ -654,7 +654,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
EmitInstruction(
Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg(
@@ -669,7 +669,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
case 1:
break;
case 2: {
- const MCExpr *Disp = MCConstantExpr::Create(1, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(1, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
SMLoc(), SMLoc()));
@@ -720,7 +720,7 @@ void X86AddressSanitizer32::InstrumentMemOperandLarge(
Inst.setOpcode(X86::CMP16mi);
break;
}
- const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1,
SMLoc(), SMLoc()));
@@ -729,7 +729,7 @@ void X86AddressSanitizer32::InstrumentMemOperandLarge(
EmitInstruction(Out, Inst);
}
MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
@@ -743,7 +743,7 @@ void X86AddressSanitizer32::InstrumentMOVSImpl(unsigned AccessSize,
// No need to test when ECX is equals to zero.
MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
EmitInstruction(
Out, MCInstBuilder(X86::TEST32rr).addReg(X86::ECX).addReg(X86::ECX));
EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
@@ -860,7 +860,7 @@ public:
private:
void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) {
- const MCExpr *Disp = MCConstantExpr::Create(Offset, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(Offset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, X86::RSP, 0, 1,
SMLoc(), SMLoc()));
@@ -885,7 +885,7 @@ private:
const std::string &Fn = FuncName(AccessSize, IsWrite);
MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn));
const MCSymbolRefExpr *FnExpr =
- MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
+ MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr));
}
};
@@ -914,7 +914,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
MCInst Inst;
Inst.setOpcode(X86::MOV8rm);
Inst.addOperand(MCOperand::createReg(ShadowRegI8));
- const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1,
SMLoc(), SMLoc()));
@@ -925,7 +925,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
EmitInstruction(
Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg(
@@ -940,7 +940,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
case 1:
break;
case 2: {
- const MCExpr *Disp = MCConstantExpr::Create(1, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(1, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
SMLoc(), SMLoc()));
@@ -991,7 +991,7 @@ void X86AddressSanitizer64::InstrumentMemOperandLarge(
Inst.setOpcode(X86::CMP16mi);
break;
}
- const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1,
SMLoc(), SMLoc()));
@@ -1001,7 +1001,7 @@ void X86AddressSanitizer64::InstrumentMemOperandLarge(
}
MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
@@ -1015,7 +1015,7 @@ void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize,
// No need to test when RCX is equals to zero.
MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
EmitInstruction(
Out, MCInstBuilder(X86::TEST64rr).addReg(X86::RCX).addReg(X86::RCX));
EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 3047fd1..e896571 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -973,7 +973,7 @@ void X86AsmParser::SetFrameRegister(unsigned RegNo) {
std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
unsigned basereg =
is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
- const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
+ const MCExpr *Disp = MCConstantExpr::create(0, getContext());
return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
/*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
Loc, Loc, 0);
@@ -982,7 +982,7 @@ std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
unsigned basereg =
is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
- const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
+ const MCExpr *Disp = MCConstantExpr::create(0, getContext());
return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
/*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
Loc, Loc, 0);
@@ -1195,7 +1195,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
const MCExpr *Val =
- MCSymbolRefExpr::Create(Sym, Variant, getContext());
+ MCSymbolRefExpr::create(Sym, Variant, getContext());
if (IDVal == "b" && Sym->isUndefined())
return Error(Loc, "invalid reference to undefined symbol");
StringRef Identifier = Sym->getName();
@@ -1265,9 +1265,9 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
}
if (SM.getImm() || !Disp) {
- const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
+ const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
if (Disp)
- Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
+ Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
else
Disp = Imm; // An immediate displacement only.
}
@@ -1354,7 +1354,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
// Create the symbol reference.
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
+ Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
return false;
}
@@ -1382,7 +1382,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
// An immediate following a 'segment register', 'colon' token sequence can
// be followed by a bracketed expression. If it isn't we know we have our
// final segment override.
- const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
+ const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
/*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
Start, ImmDispToken.getEndLoc(), Size);
@@ -1435,7 +1435,7 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
return ErrorOperand(Tok.getLoc(), "Expected } at this point");
Parser.Lex(); // Eat "}"
const MCExpr *RndModeOp =
- MCConstantExpr::Create(rndMode, Parser.getContext());
+ MCConstantExpr::create(rndMode, Parser.getContext());
return X86Operand::CreateImm(RndModeOp, Start, End);
}
if(Tok.getIdentifier().equals("sae")){
@@ -1499,7 +1499,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
return nullptr;
}
- const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
+ const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
// BaseReg is non-zero to avoid assertions. In the context of inline asm,
// we're pointing to a local variable in memory, so the base register is
// really the frame or stack pointer.
@@ -1549,7 +1549,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
Val));
}
- NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
+ NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
return false;
}
@@ -1623,7 +1623,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
unsigned Len = End.getPointer() - TypeLoc.getPointer();
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
- const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
+ const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
return X86Operand::CreateImm(Imm, Start, End);
}
@@ -1683,7 +1683,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
Size);
- const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
+ const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
return X86Operand::CreateImm(ImmExpr, Start, End);
}
@@ -1841,7 +1841,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
// only way to do this without lookahead is to eat the '(' and see what is
// after it.
- const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+ const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
if (getLexer().isNot(AsmToken::LParen)) {
SMLoc ExprEnd;
if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
@@ -2061,7 +2061,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
NameLoc));
- const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
+ const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
getParser().getContext());
Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
@@ -2088,7 +2088,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
- const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
+ const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
getParser().getContext());
Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
@@ -2115,7 +2115,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (ComparisonCode != ~0U) {
Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
- const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
+ const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
getParser().getContext());
Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
@@ -2375,7 +2375,7 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
assert(Op.isImm() && "expected immediate");
int64_t Res;
- if (!Op.getImm()->EvaluateAsAbsolute(Res) || Res > 255) {
+ if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) {
Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
return false;
}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 3469d19..6e99c37 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -546,6 +546,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
case TYPE_XMM512:
mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
return;
+ case TYPE_BNDR:
+ mcInst.addOperand(MCOperand::createReg(X86::BND0 + (immediate >> 4)));
case TYPE_REL8:
isBranch = true;
pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
@@ -827,6 +829,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_VK16:
case TYPE_DEBUGREG:
case TYPE_CONTROLREG:
+ case TYPE_BNDR:
return translateRMRegister(mcInst, insn);
case TYPE_M:
case TYPE_M8:
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 9e65050..301db72 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -460,6 +460,7 @@ enum OperandEncoding {
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
+ ENUM_ENTRY(TYPE_BNDR, "MPX bounds register") \
\
ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index af4399a..ea727e6 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -150,11 +150,11 @@ void X86ATTInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
// that address in hex.
const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
int64_t Address;
- if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
O << formatHex((uint64_t)Address);
} else {
// Otherwise, just print the expression.
- O << *Op.getExpr();
+ Op.getExpr()->print(O, &MAI);
}
}
}
@@ -178,7 +178,9 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << markup("<imm:") << '$' << *Op.getExpr() << markup(">");
+ O << markup("<imm:") << '$';
+ Op.getExpr()->print(O, &MAI);
+ O << markup(">");
}
}
@@ -203,7 +205,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
O << formatImm(DispVal);
} else {
assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
- O << *DispSpec.getExpr();
+ DispSpec.getExpr()->print(O, &MAI);
}
if (IndexReg.getReg() || BaseReg.getReg()) {
@@ -273,7 +275,7 @@ void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
O << formatImm(DispSpec.getImm());
} else {
assert(DispSpec.isExpr() && "non-immediate displacement?");
- O << *DispSpec.getExpr();
+ DispSpec.getExpr()->print(O, &MAI);
}
O << markup(">");
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 4d92daf..879378f 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -131,12 +131,12 @@ void X86IntelInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
// that address in hex.
const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
int64_t Address;
- if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
O << formatHex((uint64_t)Address);
}
else {
// Otherwise, just print the expression.
- O << *Op.getExpr();
+ Op.getExpr()->print(O, &MAI);
}
}
}
@@ -150,7 +150,7 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << formatImm((int64_t)Op.getImm());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << *Op.getExpr();
+ Op.getExpr()->print(O, &MAI);
}
}
@@ -187,7 +187,7 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
if (!DispSpec.isImm()) {
if (NeedPlus) O << " + ";
assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
- O << *DispSpec.getExpr();
+ DispSpec.getExpr()->print(O, &MAI);
} else {
int64_t DispVal = DispSpec.getImm();
if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
@@ -245,7 +245,7 @@ void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
O << formatImm(DispSpec.getImm());
} else {
assert(DispSpec.isExpr() && "non-immediate displacement?");
- O << *DispSpec.getExpr();
+ DispSpec.getExpr()->print(O, &MAI);
}
O << ']';
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 6d4284d..1ac656d 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -326,7 +326,7 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
// FIXME: We could generated something better than plain 0x90.
if (!HasNopl) {
for (uint64_t i = 0; i < Count; ++i)
- OW->Write8(0x90);
+ OW->write8(0x90);
return true;
}
@@ -336,10 +336,10 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
for (uint8_t i = 0; i < Prefixes; i++)
- OW->Write8(0x66);
+ OW->write8(0x66);
const uint8_t Rest = ThisNopLength - Prefixes;
for (uint8_t i = 0; i < Rest; i++)
- OW->Write8(Nops[Rest - 1][i]);
+ OW->write8(Nops[Rest - 1][i]);
Count -= ThisNopLength;
} while (Count != 0);
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 4508883..a33468d 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -66,6 +66,7 @@ static X86_64RelType getType64(unsigned Kind,
case X86::reloc_riprel_4byte:
case X86::reloc_riprel_4byte_movq_load:
return RT64_32;
+ case FK_PCRel_2:
case FK_Data_2:
return RT64_16;
case FK_PCRel_1:
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index a39def9..2943dd3 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -31,13 +31,13 @@ public:
StringRef SymName; SymI->getName(SymName);
uint64_t SymAddr; SymI->getAddress(SymAddr);
- uint64_t SymSize; SymI->getSize(SymSize);
+ uint64_t SymSize = SymI->getSize();
int64_t Addend; getELFRelocationAddend(Rel, Addend);
MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
if (!Sym->isVariable())
- Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx));
+ Sym->setVariableValue(MCConstantExpr::create(SymAddr, Ctx));
const MCExpr *Expr = nullptr;
// If hasAddend is true, then we need to add Addend (r_addend) to Expr.
@@ -76,7 +76,7 @@ public:
case R_X86_64_PC64:
// S + A - P (P/pcrel is implicit)
hasAddend = true;
- Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, Ctx);
break;
case R_X86_64_GOT32:
case R_X86_64_GOT64:
@@ -85,27 +85,27 @@ public:
case R_X86_64_GOTPLT64:
// G + A
hasAddend = true;
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, Ctx);
break;
case R_X86_64_PLT32:
// L + A - P -> S@PLT + A
hasAddend = true;
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_PLT, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_PLT, Ctx);
break;
case R_X86_64_GLOB_DAT:
case R_X86_64_JUMP_SLOT:
// S
- Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, Ctx);
break;
case R_X86_64_GOTPCREL:
case R_X86_64_GOTPCREL64:
// G + GOT + A - P -> S@GOTPCREL + A
hasAddend = true;
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
break;
case R_X86_64_GOTOFF64:
// S + A - GOT
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTOFF, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTOFF, Ctx);
break;
case R_X86_64_PLTOFF64:
// L + A - GOT
@@ -113,15 +113,15 @@ public:
case R_X86_64_SIZE32:
case R_X86_64_SIZE64:
// Z + A
- Expr = MCConstantExpr::Create(SymSize, Ctx);
+ Expr = MCConstantExpr::create(SymSize, Ctx);
break;
default:
- Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, Ctx);
break;
}
if (Expr && hasAddend && Addend != 0)
- Expr = MCBinaryExpr::CreateAdd(Expr,
- MCConstantExpr::Create(Addend, Ctx),
+ Expr = MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(Addend, Ctx),
Ctx);
return Expr;
}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index bda35f2..fc0b0f8 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -119,9 +119,9 @@ X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
MCStreamer &Streamer) const {
MCContext &Context = Streamer.getContext();
const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
- const MCExpr *Four = MCConstantExpr::Create(4, Context);
- return MCBinaryExpr::CreateAdd(Res, Four, Context);
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+ const MCExpr *Four = MCConstantExpr::create(4, Context);
+ return MCBinaryExpr::createAdd(Res, Four, Context);
}
void X86MCAsmInfoMicrosoft::anchor() { }
@@ -132,6 +132,11 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
PrivateLabelPrefix = ".L";
PointerSize = 8;
WinEHEncodingType = WinEH::EncodingType::Itanium;
+ } else {
+ // 32-bit X86 doesn't use CFI, so this isn't a real encoding type. It's just
+ // a place holder that the Windows EHStreamer looks for to suppress CFI
+ // output. In particular, usesWindowsCFI() returns false.
+ WinEHEncodingType = WinEH::EncodingType::X86;
}
ExceptionsType = ExceptionHandling::WinEH;
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 8aed7a4..10c434c 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -304,7 +304,7 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
return;
}
- Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx);
+ Expr = MCConstantExpr::create(DispOp.getImm(), Ctx);
} else {
Expr = DispOp.getExpr();
}
@@ -351,7 +351,7 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
ImmOffset -= 1;
if (ImmOffset)
- Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx),
+ Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(ImmOffset, Ctx),
Ctx);
// Emit a symbolic constant as a fixup and 4 zeros.
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 8e3c721..cc98e55 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -115,8 +115,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
return X;
}
-static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
- Triple TheTriple(TT);
+static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TheTriple) {
bool is64Bit = TheTriple.getArch() == Triple::x86_64;
MCAsmInfo *MAI;
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index 6cf5af7..a5aadd6 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -39,33 +39,33 @@ public:
MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
if (!Sym->isVariable())
- Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx));
+ Sym->setVariableValue(MCConstantExpr::create(SymAddr, Ctx));
const MCExpr *Expr = nullptr;
switch(RelType) {
case X86_64_RELOC_TLV:
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
break;
case X86_64_RELOC_SIGNED_4:
- Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx),
- MCConstantExpr::Create(4, Ctx),
+ Expr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Sym, Ctx),
+ MCConstantExpr::create(4, Ctx),
Ctx);
break;
case X86_64_RELOC_SIGNED_2:
- Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx),
- MCConstantExpr::Create(2, Ctx),
+ Expr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Sym, Ctx),
+ MCConstantExpr::create(2, Ctx),
Ctx);
break;
case X86_64_RELOC_SIGNED_1:
- Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx),
- MCConstantExpr::Create(1, Ctx),
+ Expr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Sym, Ctx),
+ MCConstantExpr::create(1, Ctx),
Ctx);
break;
case X86_64_RELOC_GOT_LOAD:
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
break;
case X86_64_RELOC_GOT:
- Expr = MCSymbolRefExpr::Create(Sym, isPCRel ?
+ Expr = MCSymbolRefExpr::create(Sym, isPCRel ?
MCSymbolRefExpr::VK_GOTPCREL :
MCSymbolRefExpr::VK_GOT,
Ctx);
@@ -84,7 +84,7 @@ public:
report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
"X86_64_RELOC_SUBTRACTOR.");
- const MCExpr *LHS = MCSymbolRefExpr::Create(Sym, Ctx);
+ const MCExpr *LHS = MCSymbolRefExpr::create(Sym, Ctx);
symbol_iterator RSymI = Rel.getSymbol();
uint64_t RSymAddr;
@@ -94,15 +94,15 @@ public:
MCSymbol *RSym = Ctx.getOrCreateSymbol(RSymName);
if (!RSym->isVariable())
- RSym->setVariableValue(MCConstantExpr::Create(RSymAddr, Ctx));
+ RSym->setVariableValue(MCConstantExpr::create(RSymAddr, Ctx));
- const MCExpr *RHS = MCSymbolRefExpr::Create(RSym, Ctx);
+ const MCExpr *RHS = MCSymbolRefExpr::create(RSym, Ctx);
- Expr = MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ Expr = MCBinaryExpr::createSub(LHS, RHS, Ctx);
break;
}
default:
- Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, Ctx);
break;
}
return Expr;
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 9da3e1f..95acc07 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
namespace {
class X86MachObjectWriter : public MCMachObjectTargetWriter {
- bool RecordScatteredRelocation(MachObjectWriter *Writer,
+ bool recordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -33,7 +33,7 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter {
MCValue Target,
unsigned Log2Size,
uint64_t &FixedValue);
- void RecordTLVPRelocation(MachObjectWriter *Writer,
+ void recordTLVPRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -54,12 +54,10 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter {
MCValue Target, uint64_t &FixedValue);
public:
- X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
- uint32_t CPUSubtype)
- : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
- /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+ X86MachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
- void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+ void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override {
@@ -142,13 +140,11 @@ void X86MachObjectWriter::RecordX86_64Relocation(
const MCSymbol *A = &Target.getSymA()->getSymbol();
if (A->isTemporary())
A = &Writer->findAliasedSymbol(*A);
- const MCSymbolData &A_SD = Asm.getSymbolData(*A);
const MCSymbol *A_Base = Asm.getAtom(*A);
const MCSymbol *B = &Target.getSymB()->getSymbol();
if (B->isTemporary())
B = &Writer->findAliasedSymbol(*B);
- const MCSymbolData &B_SD = Asm.getSymbolData(*B);
const MCSymbol *B_Base = Asm.getAtom(*B);
// Neither symbol can be modified.
@@ -190,7 +186,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
(!B_Base ? 0 : Writer->getSymbolAddress(*B_Base, Layout));
if (!A_Base)
- Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+ Index = A->getFragment()->getParent()->getOrdinal() + 1;
Type = MachO::X86_64_RELOC_UNSIGNED;
MachO::any_relocation_info MRE;
@@ -202,7 +198,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
if (B_Base)
RelSymbol = B_Base;
else
- Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+ Index = B->getFragment()->getParent()->getOrdinal() + 1;
Type = MachO::X86_64_RELOC_SUBTRACTOR;
} else {
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
@@ -211,7 +207,6 @@ void X86MachObjectWriter::RecordX86_64Relocation(
if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
Asm.addLocalUsedInReloc(*Symbol);
}
- const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
RelSymbol = Asm.getAtom(*Symbol);
// Relocations inside debug sections always use local relocations when
@@ -235,7 +230,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
Layout.getSymbolOffset(*RelSymbol);
} else if (Symbol->isInSection() && !Symbol->isVariable()) {
// The index is the section ordinal (1-based).
- Index = SD.getFragment()->getParent()->getOrdinal() + 1;
+ Index = Symbol->getFragment()->getParent()->getOrdinal() + 1;
Value += Writer->getSymbolAddress(*Symbol, Layout);
if (IsPCRel)
@@ -243,7 +238,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
} else if (Symbol->isVariable()) {
const MCExpr *Value = Symbol->getVariableValue();
int64_t Res;
- bool isAbs = Value->EvaluateAsAbsolute(Res, Layout,
+ bool isAbs = Value->evaluateAsAbsolute(Res, Layout,
Writer->getSectionAddressMap());
if (isAbs) {
FixedValue = Res;
@@ -339,7 +334,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
-bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
+bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -354,23 +349,21 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
- if (!A_SD->getFragment())
+ if (!A->getFragment())
report_fatal_error("symbol '" + A->getName() +
"' can not be undefined in a subtraction expression",
false);
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
- uint64_t SecAddr =
- Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
FixedValue += SecAddr;
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
- const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+ const MCSymbol *SB = &B->getSymbol();
- if (!B_SD->getFragment())
+ if (!SB->getFragment())
report_fatal_error("symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression",
false);
@@ -380,10 +373,10 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
// Note that there is no longer any semantic difference between these two
// relocation types from the linkers point of view, this is done solely for
// pedantic compatibility with 'as'.
- Type = A_SD->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF :
- (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF;
+ Type = A->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF
+ : (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF;
Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
- FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
}
// Relocations are written out in reverse order, so the PAIR comes first.
@@ -435,7 +428,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
return true;
}
-void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
+void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -490,7 +483,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
// If this is a 32-bit TLVP reloc it's handled a bit differently.
if (Target.getSymA() &&
Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
- RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ recordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
FixedValue);
return;
}
@@ -499,7 +492,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
// scattered relocation entry. Differences always require scattered
// relocations.
if (Target.getSymB()) {
- RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
Target, Log2Size, FixedValue);
return;
}
@@ -515,10 +508,10 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
if (IsPCRel)
Offset += 1 << Log2Size;
// Try to record the scattered relocation if needed. Fall back to non
- // scattered if necessary (see comments in RecordScatteredRelocation()
+ // scattered if necessary (see comments in recordScatteredRelocation()
// for details).
if (Offset && A && !Writer->doesSymbolRequireExternRelocation(*A) &&
- RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
Log2Size, FixedValue))
return;
@@ -538,7 +531,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
// Resolve constant variables.
if (A->isVariable()) {
int64_t Res;
- if (A->getVariableValue()->EvaluateAsAbsolute(
+ if (A->getVariableValue()->evaluateAsAbsolute(
Res, Layout, Writer->getSectionAddressMap())) {
FixedValue = Res;
return;
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index c70e2e9..8522674 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -168,6 +168,8 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
+def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
+ "Support MPX instructions">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
@@ -188,10 +190,6 @@ def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
-def FeatureUseSqrtEst : SubtargetFeature<"use-sqrt-est", "UseSqrtEst", "true",
- "Use RSQRT* to optimize square root calculations">;
-def FeatureUseRecipEst : SubtargetFeature<"use-recip-est", "UseReciprocalEst",
- "true", "Use RCP* to optimize division calculations">;
def FeatureSoftFloat
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software floating point features.">;
@@ -380,7 +378,7 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec]>;
+ FeatureSlowIncDec, FeatureMPX]>;
def : KnightsLandingProc<"knl">;
// FIXME: define SKX model
@@ -391,7 +389,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec]>;
+ FeatureSlowIncDec, FeatureMPX]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.
@@ -444,7 +442,7 @@ def : ProcessorModel<"btver2", BtVer2Model,
FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
FeatureBMI, FeatureF16C, FeatureMOVBE,
FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem,
- FeatureSlowSHLD, FeatureUseSqrtEst, FeatureUseRecipEst]>;
+ FeatureSlowSHLD]>;
// TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips.
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index f97557e..64fc6d0 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -78,7 +78,7 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
switch (MO.getType()) {
default: llvm_unreachable("unknown symbol type!");
case MachineOperand::MO_ConstantPoolIndex:
- O << *P.GetCPISymbol(MO.getIndex());
+ P.GetCPISymbol(MO.getIndex())->print(O, P.MAI);
P.printOffset(MO.getOffset(), O);
break;
case MachineOperand::MO_GlobalAddress: {
@@ -127,9 +127,12 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
// If the name begins with a dollar-sign, enclose it in parens. We do this
// to avoid having it look like an integer immediate to the assembler.
if (GVSym->getName()[0] != '$')
- O << *GVSym;
- else
- O << '(' << *GVSym << ')';
+ GVSym->print(O, P.MAI);
+ else {
+ O << '(';
+ GVSym->print(O, P.MAI);
+ O << ')';
+ }
P.printOffset(MO.getOffset(), O);
break;
}
@@ -146,12 +149,15 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
// These affect the name of the symbol, not any suffix.
break;
case X86II::MO_GOT_ABSOLUTE_ADDRESS:
- O << " + [.-" << *P.MF->getPICBaseSymbol() << ']';
+ O << " + [.-";
+ P.MF->getPICBaseSymbol()->print(O, P.MAI);
+ O << ']';
break;
case X86II::MO_PIC_BASE_OFFSET:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
- O << '-' << *P.MF->getPICBaseSymbol();
+ O << '-';
+ P.MF->getPICBaseSymbol()->print(O, P.MAI);
break;
case X86II::MO_TLSGD: O << "@TLSGD"; break;
case X86II::MO_TLSLD: O << "@TLSLD"; break;
@@ -168,7 +174,8 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
case X86II::MO_PLT: O << "@PLT"; break;
case X86II::MO_TLVP: O << "@TLVP"; break;
case X86II::MO_TLVP_PIC_BASE:
- O << "@TLVP" << '-' << *P.MF->getPICBaseSymbol();
+ O << "@TLVP" << '-';
+ P.MF->getPICBaseSymbol()->print(O, P.MAI);
break;
case X86II::MO_SECREL: O << "@SECREL32"; break;
}
@@ -525,7 +532,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
// register any SEH handlers, so its object files should be safe.
OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
OutStreamer->EmitAssignment(
- S, MCConstantExpr::Create(int64_t(1), MMI->getContext()));
+ S, MCConstantExpr::create(int64_t(1), MMI->getContext()));
}
}
}
@@ -549,7 +556,7 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
// using NLPs; however, sometimes the types are local to the file.
// We need to fill in the value for the NLP in those cases.
OutStreamer.EmitValue(
- MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()),
+ MCSymbolRefExpr::create(MCSym.getPointer(), OutStreamer.getContext()),
4 /*size*/);
}
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index 9af0aeb..3dc75d7 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3530,9 +3530,9 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
SmallVector<MachineOperand, 8> AddrOps;
AM.getFullAddress(AddrOps);
- MachineInstr *Result =
- XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps,
- Size, Alignment, /*AllowCommute=*/true);
+ MachineInstr *Result = XII.foldMemoryOperandImpl(
+ *FuncInfo.MF, MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
+ /*AllowCommute=*/true);
if (!Result)
return false;
@@ -3541,20 +3541,21 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
// to just look at OpNo + the offset to the index reg. We actually need to
// scan the instruction to find the index reg and see if its the correct reg
// class.
- for (MIOperands MO(Result); MO.isValid(); ++MO) {
- if (!MO->isReg() || MO->isDef() || MO->getReg() != AM.IndexReg)
+ unsigned OperandNo = 0;
+ for (MachineInstr::mop_iterator I = Result->operands_begin(),
+ E = Result->operands_end(); I != E; ++I, ++OperandNo) {
+ MachineOperand &MO = *I;
+ if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
continue;
// Found the index reg, now try to rewrite it.
- unsigned OpNo = MO.getOperandNo();
unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
- MO->getReg(), OpNo);
- if (IndexReg == MO->getReg())
+ MO.getReg(), OperandNo);
+ if (IndexReg == MO.getReg())
continue;
- MO->setReg(IndexReg);
+ MO.setReg(IndexReg);
}
Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
- FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
MI->eraseFromParent();
return true;
}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3ba8115..e3ec288 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -67,12 +67,6 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization(
"rather than promotion."),
cl::Hidden);
-static cl::opt<int> ReciprocalEstimateRefinementSteps(
- "x86-recip-refinement-steps", cl::init(1),
- cl::desc("Specify the number of Newton-Raphson iterations applied to the "
- "result of the hardware reciprocal estimate instruction."),
- cl::NotHidden);
-
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2);
@@ -842,13 +836,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
- // Only provide customized ctpop vector bit twiddling for vector types we
- // know to perform better than using the popcnt instructions on each vector
- // element. If popcnt isn't supported, always provide the custom version.
- if (!Subtarget->hasPOPCNT()) {
- setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
- setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
- }
+ setOperationAction(ISD::CTPOP, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
@@ -1113,6 +1104,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v32i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v16i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
+
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f64, Legal);
@@ -1147,16 +1143,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// when we have a 256bit-wide blend with immediate.
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
- // Only provide customized ctpop vector bit twiddling for vector types we
- // know to perform better than using the popcnt instructions on each
- // vector element. If popcnt isn't supported, always provide the custom
- // version.
- if (!Subtarget->hasPOPCNT())
- setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
-
- // Custom CTPOP always performs better on natively supported v8i32
- setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
-
// AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
@@ -1273,7 +1259,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
-
+
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::SETCC, MVT::i1, Custom);
setOperationAction(ISD::XOR, MVT::i1, Legal);
@@ -1842,7 +1828,7 @@ X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
Subtarget->isPICStyleGOT());
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
// entries.
- return MCSymbolRefExpr::Create(MBB->getSymbol(),
+ return MCSymbolRefExpr::create(MBB->getSymbol(),
MCSymbolRefExpr::VK_GOTOFF, Ctx);
}
@@ -1866,7 +1852,7 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
// Otherwise, the reference is relative to the PIC base.
- return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+ return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
}
std::pair<const TargetRegisterClass *, uint8_t>
@@ -1981,7 +1967,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
}
else if (VA.getLocInfo() == CCValAssign::BCvt)
- ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
+ ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
assert(VA.getLocInfo() != CCValAssign::FPExt &&
"Unexpected FP-extend for return value.");
@@ -2018,13 +2004,13 @@ X86TargetLowering::LowerReturn(SDValue Chain,
if (Subtarget->is64Bit()) {
if (ValVT == MVT::x86mmx) {
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
- ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
+ ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
ValToCopy);
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
if (!Subtarget->hasSSE2())
- ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
+ ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
}
}
}
@@ -2451,7 +2437,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::BCvt)
- ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+ ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
if (VA.isExtInLoc()) {
// Handle MMX values passed in XMM regs.
@@ -2780,6 +2766,19 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (MF.getTarget().Options.DisableTailCalls)
isTailCall = false;
+ if (Subtarget->isPICStyleGOT() &&
+ !MF.getTarget().Options.GuaranteedTailCallOpt) {
+ // If we are using a GOT, disable tail calls to external symbols with
+ // default visibility. Tail calling such a symbol requires using a GOT
+ // relocation, which forces early binding of the symbol. This breaks code
+ // that require lazy function symbol resolution. Using musttail or
+ // GuaranteedTailCallOpt will override this.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (!G || (!G->getGlobal()->hasLocalLinkage() &&
+ G->getGlobal()->hasDefaultVisibility()))
+ isTailCall = false;
+ }
+
bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
if (IsMustTail) {
// Force this to be a tail call. The verifier rules are enough to ensure
@@ -2898,14 +2897,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
else if (RegVT.is128BitVector()) {
// Special case: passing MMX values in XMM registers.
- Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+ Arg = DAG.getBitcast(MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
} else
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
+ Arg = DAG.getBitcast(RegVT, Arg);
break;
case CCValAssign::Indirect: {
// Store the argument.
@@ -2964,8 +2963,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Note: The actual moving to ECX is done further down.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (G && !G->getGlobal()->hasHiddenVisibility() &&
- !G->getGlobal()->hasProtectedVisibility())
+ if (G && !G->getGlobal()->hasLocalLinkage() &&
+ G->getGlobal()->hasDefaultVisibility())
Callee = LowerGlobalAddress(Callee, DAG);
else if (isa<ExternalSymbolSDNode>(Callee))
Callee = LowerExternalSymbol(Callee, DAG);
@@ -4073,7 +4072,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
} else
llvm_unreachable("Unexpected vector type");
- return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+ return DAG.getBitcast(VT, Vec);
}
static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
@@ -4200,9 +4199,9 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8);
- Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256);
+ Vec256 = DAG.getBitcast(CastVT, Vec256);
Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
- return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256);
+ return DAG.getBitcast(ResultVT, Vec256);
}
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
@@ -4255,7 +4254,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
} else
llvm_unreachable("Unexpected vector type");
- return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+ return DAG.getBitcast(VT, Vec);
}
/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
@@ -4611,7 +4610,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
}
}
- return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
+ return DAG.getBitcast(MVT::v16i8, V);
}
/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
@@ -4749,7 +4748,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
DAG.getIntPtrConstant(InsertPSMask, DL));
- return DAG.getNode(ISD::BITCAST, DL, VT, Result);
+ return DAG.getBitcast(VT, Result);
}
/// Return a vector logical shift node.
@@ -4759,12 +4758,11 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
assert(VT.is128BitVector() && "Unknown type for VShift");
MVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
- SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
+ SrcOp = DAG.getBitcast(ShVT, SrcOp);
MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType());
assert(NumBits % 8 == 0 && "Only support byte sized shifts");
SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
+ return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
}
static SDValue
@@ -4949,7 +4947,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
SDValue(ResNode.getNode(), 1));
}
- return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
+ return DAG.getBitcast(VT, ResNode);
}
return SDValue();
}
@@ -5261,8 +5259,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
SDValue Imm = ConvertI1VectorToInterger(Op, DAG);
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
- return DAG.getNode(ISD::BITCAST, dl, VT, Imm);
- SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
+ return DAG.getBitcast(VT, Imm);
+ SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
DAG.getIntPtrConstant(0, dl));
}
@@ -5277,7 +5275,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
SDValue In = Op.getOperand(idx);
if (In.getOpcode() == ISD::UNDEF)
continue;
- if (!isa<ConstantSDNode>(In))
+ if (!isa<ConstantSDNode>(In))
NonConstIdx.push_back(idx);
else {
Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
@@ -5304,12 +5302,12 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
}
else if (HasConstElts)
Imm = DAG.getConstant(0, dl, VT);
- else
+ else
Imm = DAG.getUNDEF(VT);
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
- DstVec = DAG.getNode(ISD::BITCAST, dl, VT, Imm);
+ DstVec = DAG.getBitcast(VT, Imm);
else {
- SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
+ SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
DAG.getIntPtrConstant(0, dl));
}
@@ -5818,9 +5816,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// convert it to a vector with movd (S2V+shuffle to zero extend).
Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
- return DAG.getNode(
- ISD::BITCAST, dl, VT,
- getShuffleVectorZeroOrUndef(Item, Idx * 2, true, Subtarget, DAG));
+ return DAG.getBitcast(VT, getShuffleVectorZeroOrUndef(
+ Item, Idx * 2, true, Subtarget, DAG));
}
}
@@ -5866,7 +5863,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
- return DAG.getNode(ISD::BITCAST, dl, VT, Item);
+ return DAG.getBitcast(VT, Item);
}
}
@@ -6257,6 +6254,42 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
return true;
}
+/// \brief Test whether a shuffle mask is equivalent within each 256-bit lane.
+///
+/// This checks a shuffle mask to see if it is performing the same
+/// 256-bit lane-relative shuffle in each 256-bit lane. This trivially implies
+/// that it is also not lane-crossing. It may however involve a blend from the
+/// same lane of a second vector.
+///
+/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
+/// non-trivial to compute in the face of undef lanes. The representation is
+/// *not* suitable for use with existing 256-bit shuffles as it will contain
+/// entries from both V1 and V2 inputs to the wider mask.
+static bool
+is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &RepeatedMask) {
+ int LaneSize = 256 / VT.getScalarSizeInBits();
+ RepeatedMask.resize(LaneSize, -1);
+ int Size = Mask.size();
+ for (int i = 0; i < Size; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((Mask[i] % Size) / LaneSize != i / LaneSize)
+ // This entry crosses lanes, so there is no way to model this shuffle.
+ return false;
+
+ // Ok, handle the in-lane shuffles by detecting if and when they repeat.
+ if (RepeatedMask[i % LaneSize] == -1)
+ // This is the first non-undef entry in this slot of a 256-bit lane.
+ RepeatedMask[i % LaneSize] =
+ Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size;
+ else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i])
+ // Found a mismatch with the repeated mask.
+ return false;
+ }
+ return true;
+}
+
/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
/// arguments.
///
@@ -6316,6 +6349,22 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
return DAG.getConstant(Imm, DL, MVT::i8);
}
+/// \brief Get a 8-bit shuffle, 1 bit per lane, immediate for a mask.
+///
+/// This helper function produces an 8-bit shuffle immediate corresponding to
+/// the ubiquitous shuffle encoding scheme used in x86 instructions for
+/// shuffling 8 lanes.
+static SDValue get1bitLaneShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
+ SelectionDAG &DAG) {
+ assert(Mask.size() <= 8 &&
+ "Up to 8 elts may be in Imm8 1-bit lane shuffle mask");
+ unsigned Imm = 0;
+ for (unsigned i = 0; i < Mask.size(); ++i)
+ if (Mask[i] >= 0)
+ Imm |= (Mask[i] % 2) << i;
+ return DAG.getConstant(Imm, DL, MVT::i8);
+}
+
/// \brief Try to emit a blend instruction for a shuffle using bit math.
///
/// This is used as a fallback approach when first class blend instructions are
@@ -6341,10 +6390,9 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1,
V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask);
// We have to cast V2 around.
MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
- V2 = DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::ANDNP, DL, MaskVT,
- DAG.getNode(ISD::BITCAST, DL, MaskVT, V1Mask),
- DAG.getNode(ISD::BITCAST, DL, MaskVT, V2)));
+ V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::ANDNP, DL, MaskVT,
+ DAG.getBitcast(MaskVT, V1Mask),
+ DAG.getBitcast(MaskVT, V2)));
return DAG.getNode(ISD::OR, DL, VT, V1, V2);
}
@@ -6395,11 +6443,11 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
BlendMask |= 1u << (i * Scale + j);
MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
- V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2);
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
- DAG.getConstant(BlendMask, DL, MVT::i8)));
+ V1 = DAG.getBitcast(BlendVT, V1);
+ V2 = DAG.getBitcast(BlendVT, V2);
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
+ DAG.getConstant(BlendMask, DL, MVT::i8)));
}
// FALLTHROUGH
case MVT::v8i16: {
@@ -6412,11 +6460,11 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
for (int j = 0; j < Scale; ++j)
BlendMask |= 1u << (i * Scale + j);
- V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2);
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
- DAG.getConstant(BlendMask, DL, MVT::i8)));
+ V1 = DAG.getBitcast(MVT::v8i16, V1);
+ V2 = DAG.getBitcast(MVT::v8i16, V2);
+ return DAG.getBitcast(VT,
+ DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
+ DAG.getConstant(BlendMask, DL, MVT::i8)));
}
case MVT::v16i16: {
@@ -6465,13 +6513,12 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
: DAG.getConstant(Mask[i] < Size ? -1 : 0, DL,
MVT::i8));
- V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2);
- return DAG.getNode(
- ISD::BITCAST, DL, VT,
- DAG.getNode(ISD::VSELECT, DL, BlendVT,
- DAG.getNode(ISD::BUILD_VECTOR, DL, BlendVT, VSELECTMask),
- V1, V2));
+ V1 = DAG.getBitcast(BlendVT, V1);
+ V2 = DAG.getBitcast(BlendVT, V2);
+ return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, DL, BlendVT,
+ DAG.getNode(ISD::BUILD_VECTOR, DL,
+ BlendVT, VSELECTMask),
+ V1, V2));
}
default:
@@ -6652,13 +6699,12 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
if (Subtarget->hasSSSE3()) {
// Cast the inputs to i8 vector of correct length to match PALIGNR.
MVT AlignVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes);
- Lo = DAG.getNode(ISD::BITCAST, DL, AlignVT, Lo);
- Hi = DAG.getNode(ISD::BITCAST, DL, AlignVT, Hi);
+ Lo = DAG.getBitcast(AlignVT, Lo);
+ Hi = DAG.getBitcast(AlignVT, Hi);
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo,
- DAG.getConstant(Rotation * Scale, DL,
- MVT::i8)));
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo,
+ DAG.getConstant(Rotation * Scale, DL, MVT::i8)));
}
assert(VT.getSizeInBits() == 128 &&
@@ -6671,15 +6717,15 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
int HiByteShift = Rotation * Scale;
// Cast the inputs to v2i64 to match PSLLDQ/PSRLDQ.
- Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Lo);
- Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Hi);
+ Lo = DAG.getBitcast(MVT::v2i64, Lo);
+ Hi = DAG.getBitcast(MVT::v2i64, Hi);
SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo,
DAG.getConstant(LoByteShift, DL, MVT::i8));
SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi,
DAG.getConstant(HiByteShift, DL, MVT::i8));
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
+ return DAG.getBitcast(VT,
+ DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
}
/// \brief Compute whether each element of a shuffle is zeroable.
@@ -6740,8 +6786,8 @@ static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1,
SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
IntEltVT);
if (EltVT.isFloatingPoint()) {
- Zero = DAG.getNode(ISD::BITCAST, DL, EltVT, Zero);
- AllOnes = DAG.getNode(ISD::BITCAST, DL, EltVT, AllOnes);
+ Zero = DAG.getBitcast(EltVT, Zero);
+ AllOnes = DAG.getBitcast(EltVT, AllOnes);
}
SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
@@ -6833,11 +6879,11 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1,
MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale);
assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
"Illegal integer vector type");
- V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V);
+ V = DAG.getBitcast(ShiftVT, V);
V = DAG.getNode(OpCode, DL, ShiftVT, V,
DAG.getConstant(ShiftAmt, DL, MVT::i8));
- return DAG.getNode(ISD::BITCAST, DL, VT, V);
+ return DAG.getBitcast(VT, V);
};
// SSE/AVX supports logical shifts up to 64-bit integers - so we can just
@@ -6878,31 +6924,28 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
if (Subtarget->hasSSE41()) {
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV));
+ return DAG.getBitcast(VT, DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV));
}
// For any extends we can cheat for larger element sizes and use shuffle
// instructions that can fold with a load and/or copy.
if (AnyExt && EltBits == 32) {
int PSHUFDMask[4] = {0, -1, 1, -1};
- return DAG.getNode(
- ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
- DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV),
- getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
+ DAG.getBitcast(MVT::v4i32, InputV),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
}
if (AnyExt && EltBits == 16 && Scale > 2) {
int PSHUFDMask[4] = {0, -1, 0, -1};
InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
- DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV),
+ DAG.getBitcast(MVT::v4i32, InputV),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
int PSHUFHWMask[4] = {1, -1, -1, -1};
- return DAG.getNode(
- ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16,
- DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, InputV),
- getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG)));
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16,
+ DAG.getBitcast(MVT::v8i16, InputV),
+ getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG)));
}
// If this would require more than 2 unpack instructions to expand, use
@@ -6914,11 +6957,11 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
for (int i = 0; i < 16; ++i)
PSHUFBMask[i] =
DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, DL, MVT::i8);
- InputV = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, InputV);
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
- DAG.getNode(ISD::BUILD_VECTOR, DL,
- MVT::v16i8, PSHUFBMask)));
+ InputV = DAG.getBitcast(MVT::v16i8, InputV);
+ return DAG.getBitcast(VT,
+ DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
+ DAG.getNode(ISD::BUILD_VECTOR, DL,
+ MVT::v16i8, PSHUFBMask)));
}
// Otherwise emit a sequence of unpacks.
@@ -6926,13 +6969,13 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT)
: getZeroVector(InputVT, Subtarget, DAG, DL);
- InputV = DAG.getNode(ISD::BITCAST, DL, InputVT, InputV);
+ InputV = DAG.getBitcast(InputVT, InputV);
InputV = DAG.getNode(X86ISD::UNPCKL, DL, InputVT, InputV, Ext);
Scale /= 2;
EltBits *= 2;
NumElements /= 2;
} while (Scale > 1);
- return DAG.getNode(ISD::BITCAST, DL, VT, InputV);
+ return DAG.getBitcast(VT, InputV);
}
/// \brief Try to lower a vector shuffle as a zero extension on any microarch.
@@ -7030,9 +7073,9 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
};
if (SDValue V = CanZExtLowHalf()) {
- V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V);
+ V = DAG.getBitcast(MVT::v2i64, V);
V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V);
- return DAG.getNode(ISD::BITCAST, DL, VT, V);
+ return DAG.getBitcast(VT, V);
}
// No viable ext lowering found.
@@ -7106,7 +7149,7 @@ static SDValue lowerVectorShuffleAsElementInsertion(
if (SDValue V2S = getScalarValueForVectorElement(
V2, Mask[V2Index] - Mask.size(), DAG)) {
// We need to zext the scalar if it is smaller than an i32.
- V2S = DAG.getNode(ISD::BITCAST, DL, EltVT, V2S);
+ V2S = DAG.getBitcast(EltVT, V2S);
if (EltVT == MVT::i8 || EltVT == MVT::i16) {
// Using zext to expand a narrow element won't work for non-zero
// insertions.
@@ -7155,7 +7198,7 @@ static SDValue lowerVectorShuffleAsElementInsertion(
V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
if (ExtVT != VT)
- V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
+ V2 = DAG.getBitcast(VT, V2);
if (V2Index != 0) {
// If we have 4 or fewer lanes we can cheaply shuffle the element into
@@ -7167,13 +7210,13 @@ static SDValue lowerVectorShuffleAsElementInsertion(
V2Shuffle[V2Index] = 0;
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
} else {
- V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V2);
+ V2 = DAG.getBitcast(MVT::v2i64, V2);
V2 = DAG.getNode(
X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
DAG.getConstant(
V2Index * EltVT.getSizeInBits()/8, DL,
DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64)));
- V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
+ V2 = DAG.getBitcast(VT, V2);
}
}
return V2;
@@ -7396,13 +7439,13 @@ static SDValue lowerVectorShuffleAsUnpack(SDLoc DL, MVT VT, SDValue V1,
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
// Cast the inputs to the type we will use to unpack them.
- V1 = DAG.getNode(ISD::BITCAST, DL, UnpackVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, UnpackVT, V2);
+ V1 = DAG.getBitcast(UnpackVT, V1);
+ V2 = DAG.getBitcast(UnpackVT, V2);
// Unpack the inputs and cast the result back to the desired type.
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH,
- DL, UnpackVT, V1, V2));
+ return DAG.getBitcast(
+ VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
+ UnpackVT, V1, V2));
};
// We try each unpack from the largest to the smallest to try and find one
@@ -7558,12 +7601,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// Straight shuffle of a single input vector. For everything from SSE2
// onward this has a single fast instruction with no scary immediates.
// We have to map the mask as it is actually a v4i32 shuffle instruction.
- V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V1);
+ V1 = DAG.getBitcast(MVT::v4i32, V1);
int WidenedMask[4] = {
std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1,
std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1};
- return DAG.getNode(
- ISD::BITCAST, DL, MVT::v2i64,
+ return DAG.getBitcast(
+ MVT::v2i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG)));
}
@@ -7584,12 +7627,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
};
if (SDValue V1Pack = GetPackNode(V1))
if (SDValue V2Pack = GetPackNode(V2))
- return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
- DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8,
- Mask[0] == 0 ? V1Pack.getOperand(0)
- : V1Pack.getOperand(1),
- Mask[1] == 2 ? V2Pack.getOperand(0)
- : V2Pack.getOperand(1)));
+ return DAG.getBitcast(MVT::v2i64,
+ DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8,
+ Mask[0] == 0 ? V1Pack.getOperand(0)
+ : V1Pack.getOperand(1),
+ Mask[1] == 2 ? V2Pack.getOperand(0)
+ : V2Pack.getOperand(1)));
// Try to use shift instructions.
if (SDValue Shift =
@@ -7639,10 +7682,10 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
// However, all the alternatives are still more cycles and newer chips don't
// have this problem. It would be really nice if x86 had better shuffles here.
- V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V2);
- return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
- DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
+ V1 = DAG.getBitcast(MVT::v2f64, V1);
+ V2 = DAG.getBitcast(MVT::v2f64, V2);
+ return DAG.getBitcast(MVT::v2i64,
+ DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
}
/// \brief Test whether this can be lowered with a single SHUFPS instruction.
@@ -7941,11 +7984,10 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// up the inputs, bypassing domain shift penalties that we would encur if we
// directly used PSHUFD on Nehalem and older. For newer chips, this isn't
// relevant.
- return DAG.getNode(ISD::BITCAST, DL, MVT::v4i32,
- DAG.getVectorShuffle(
- MVT::v4f32, DL,
- DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V1),
- DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V2), Mask));
+ return DAG.getBitcast(
+ MVT::v4i32,
+ DAG.getVectorShuffle(MVT::v4f32, DL, DAG.getBitcast(MVT::v4f32, V1),
+ DAG.getBitcast(MVT::v4f32, V2), Mask));
}
/// \brief Lowering of single-input v8i16 shuffles is the cornerstone of SSE2
@@ -8123,11 +8165,10 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
int PSHUFDMask[] = {0, 1, 2, 3};
PSHUFDMask[ADWord] = BDWord;
PSHUFDMask[BDWord] = ADWord;
- V = DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT,
- DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V),
- getV4X86ShuffleImm8ForMask(PSHUFDMask, DL,
- DAG)));
+ V = DAG.getBitcast(
+ VT,
+ DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
// Adjust the mask to match the new locations of A and B.
for (int &M : Mask)
@@ -8368,11 +8409,10 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG));
if (!isNoopShuffleMask(PSHUFDMask))
- V = DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT,
- DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V),
- getV4X86ShuffleImm8ForMask(PSHUFDMask, DL,
- DAG)));
+ V = DAG.getBitcast(
+ VT,
+ DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
// At this point, each half should contain all its inputs, and we can then
// just shuffle them into their final position.
@@ -8433,11 +8473,11 @@ static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1,
if (V1InUse)
V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
- DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, V1),
+ DAG.getBitcast(MVT::v16i8, V1),
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
if (V2InUse)
V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
- DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, V2),
+ DAG.getBitcast(MVT::v16i8, V2),
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
// If we need shuffled inputs from both, blend the two.
@@ -8448,7 +8488,7 @@ static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1,
V = V1InUse ? V1 : V2;
// Cast the result back to the correct type.
- return DAG.getNode(ISD::BITCAST, DL, VT, V);
+ return DAG.getBitcast(VT, V);
}
/// \brief Generic lowering of 8-lane i16 shuffles.
@@ -8749,10 +8789,9 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// Update the lane map based on the mapping we ended up with.
LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2;
}
- V1 = DAG.getNode(
- ISD::BITCAST, DL, MVT::v16i8,
- DAG.getVectorShuffle(MVT::v8i16, DL,
- DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1),
+ V1 = DAG.getBitcast(
+ MVT::v16i8,
+ DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1),
DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle));
// Unpack the bytes to form the i16s that will be shuffled into place.
@@ -8770,10 +8809,9 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(PostDupI16Shuffle[i / 2] == MappedMask &&
"Conflicting entrties in the original shuffle!");
}
- return DAG.getNode(
- ISD::BITCAST, DL, MVT::v16i8,
- DAG.getVectorShuffle(MVT::v8i16, DL,
- DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1),
+ return DAG.getBitcast(
+ MVT::v16i8,
+ DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1),
DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle));
};
if (SDValue V = tryToWidenViaDuplication())
@@ -8866,19 +8904,18 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// We use the mask type to pick which bytes are preserved based on how many
// elements are dropped.
MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 };
- SDValue ByteClearMask =
- DAG.getNode(ISD::BITCAST, DL, MVT::v16i8,
- DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1]));
+ SDValue ByteClearMask = DAG.getBitcast(
+ MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1]));
V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask);
if (!IsSingleInput)
V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask);
// Now pack things back together.
- V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1);
- V2 = IsSingleInput ? V1 : DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2);
+ V1 = DAG.getBitcast(MVT::v8i16, V1);
+ V2 = IsSingleInput ? V1 : DAG.getBitcast(MVT::v8i16, V2);
SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, V2);
for (int i = 1; i < NumEvenDrops; ++i) {
- Result = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, Result);
+ Result = DAG.getBitcast(MVT::v8i16, Result);
Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result);
}
@@ -8912,7 +8949,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
std::none_of(std::begin(HiBlendMask), std::end(HiBlendMask),
[](int M) { return M >= 0 && M % 2 == 1; })) {
// Use a mask to drop the high bytes.
- VLoHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
+ VLoHalf = DAG.getBitcast(MVT::v8i16, V);
VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf,
DAG.getConstant(0x00FF, DL, MVT::v8i16));
@@ -8929,10 +8966,10 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
} else {
// Otherwise just unpack the low half of V into VLoHalf and the high half into
// VHiHalf so that we can blend them as i16s.
- VLoHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
- DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
- VHiHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
- DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
+ VLoHalf = DAG.getBitcast(
+ MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
+ VHiHalf = DAG.getBitcast(
+ MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
}
SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
@@ -9073,8 +9110,8 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1,
LoV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, LoOps);
HiV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, HiOps);
}
- return std::make_pair(DAG.getNode(ISD::BITCAST, DL, SplitVT, LoV),
- DAG.getNode(ISD::BITCAST, DL, SplitVT, HiV));
+ return std::make_pair(DAG.getBitcast(SplitVT, LoV),
+ DAG.getBitcast(SplitVT, HiV));
};
SDValue LoV1, HiV1, LoV2, HiV2;
@@ -9407,12 +9444,12 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
LaneMask[2 * i + 1] = 2*Lanes[i] + 1;
}
- V1 = DAG.getNode(ISD::BITCAST, DL, LaneVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, LaneVT, V2);
+ V1 = DAG.getBitcast(LaneVT, V1);
+ V2 = DAG.getBitcast(LaneVT, V2);
SDValue LaneShuffle = DAG.getVectorShuffle(LaneVT, DL, V1, V2, LaneMask);
// Cast it back to the type we actually want.
- LaneShuffle = DAG.getNode(ISD::BITCAST, DL, VT, LaneShuffle);
+ LaneShuffle = DAG.getBitcast(VT, LaneShuffle);
// Now do a simple shuffle that isn't lane crossing.
SmallVector<int, 8> NewMask;
@@ -9441,6 +9478,37 @@ static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) {
return true;
}
+static SDValue lowerVectorShuffleWithSHUFPD(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
+
+ // Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, ..
+ // Mask for V4F64; 0/1, 4/5, 2/3, 6/7..
+ assert(VT.getScalarSizeInBits() == 64 && "Unexpected data type for VSHUFPD");
+ int NumElts = VT.getVectorNumElements();
+ bool ShufpdMask = true;
+ bool CommutableMask = true;
+ unsigned Immediate = 0;
+ for (int i = 0; i < NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ int Val = (i & 6) + NumElts * (i & 1);
+ int CommutVal = (i & 0xe) + NumElts * ((i & 1)^1);
+ if (Mask[i] < Val || Mask[i] > Val + 1)
+ ShufpdMask = false;
+ if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1)
+ CommutableMask = false;
+ Immediate |= (Mask[i] % 2) << i;
+ }
+ if (ShufpdMask)
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
+ DAG.getConstant(Immediate, DL, MVT::i8));
+ if (CommutableMask)
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
+ DAG.getConstant(Immediate, DL, MVT::i8));
+ return SDValue();
+}
+
/// \brief Handle lowering of 4-lane 64-bit floating point shuffles.
///
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
@@ -9505,24 +9573,9 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Check if the blend happens to exactly fit that of SHUFPD.
- if ((Mask[0] == -1 || Mask[0] < 2) &&
- (Mask[1] == -1 || (Mask[1] >= 4 && Mask[1] < 6)) &&
- (Mask[2] == -1 || (Mask[2] >= 2 && Mask[2] < 4)) &&
- (Mask[3] == -1 || Mask[3] >= 6)) {
- unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 5) << 1) |
- ((Mask[2] == 3) << 2) | ((Mask[3] == 7) << 3);
- return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V1, V2,
- DAG.getConstant(SHUFPDMask, DL, MVT::i8));
- }
- if ((Mask[0] == -1 || (Mask[0] >= 4 && Mask[0] < 6)) &&
- (Mask[1] == -1 || Mask[1] < 2) &&
- (Mask[2] == -1 || Mask[2] >= 6) &&
- (Mask[3] == -1 || (Mask[3] >= 2 && Mask[3] < 4))) {
- unsigned SHUFPDMask = (Mask[0] == 5) | ((Mask[1] == 1) << 1) |
- ((Mask[2] == 7) << 2) | ((Mask[3] == 3) << 3);
- return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V2, V1,
- DAG.getConstant(SHUFPDMask, DL, MVT::i8));
- }
+ if (SDValue Op =
+ lowerVectorShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG))
+ return Op;
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle. However, if we have AVX2 and either inputs are already in place,
@@ -9584,10 +9637,10 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
PSHUFDMask[2 * i] = 2 * RepeatedMask[i];
PSHUFDMask[2 * i + 1] = 2 * RepeatedMask[i] + 1;
}
- return DAG.getNode(
- ISD::BITCAST, DL, MVT::v4i64,
+ return DAG.getBitcast(
+ MVT::v4i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
- DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, V1),
+ DAG.getBitcast(MVT::v8i32, V1),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
}
}
@@ -9700,11 +9753,11 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask));
if (Subtarget->hasAVX2())
- return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32,
- DAG.getNode(ISD::BITCAST, DL, MVT::v8f32,
- DAG.getNode(ISD::BUILD_VECTOR, DL,
+ return DAG.getNode(
+ X86ISD::VPERMV, DL, MVT::v8f32,
+ DAG.getBitcast(MVT::v8f32, DAG.getNode(ISD::BUILD_VECTOR, DL,
MVT::v8i32, VPermMask)),
- V1);
+ V1);
// Otherwise, fall back.
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
@@ -9894,12 +9947,11 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
PSHUFBMask[2 * i] = DAG.getConstant(2 * M, DL, MVT::i8);
PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, DL, MVT::i8);
}
- return DAG.getNode(
- ISD::BITCAST, DL, MVT::v16i16,
- DAG.getNode(
- X86ISD::PSHUFB, DL, MVT::v32i8,
- DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V1),
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask)));
+ return DAG.getBitcast(MVT::v16i16,
+ DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8,
+ DAG.getBitcast(MVT::v32i8, V1),
+ DAG.getNode(ISD::BUILD_VECTOR, DL,
+ MVT::v32i8, PSHUFBMask)));
}
// Try to simplify this by merging 128-bit lanes to enable a lane-based
@@ -10039,10 +10091,9 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
VT.getVectorNumElements());
- V1 = DAG.getNode(ISD::BITCAST, DL, FpVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, FpVT, V2);
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
+ V1 = DAG.getBitcast(FpVT, V1);
+ V2 = DAG.getBitcast(FpVT, V2);
+ return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
}
switch (VT.SimpleTy) {
@@ -10064,64 +10115,60 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
}
-/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
-static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- SDLoc DL(Op);
- assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
- assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- ArrayRef<int> Mask = SVOp->getMask();
- assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
-
- // X86 has dedicated unpack instructions that can handle specific blend
- // operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
+static SDValue lowerVectorShuffleWithVALIGN(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
+ assert(VT.getScalarSizeInBits() >= 32 && "Unexpected data type for VALIGN");
+ // VALIGN pattern 2, 3, 4, 5, .. (sequential, shifted right)
+ int AlignVal = -1;
+ for (int i = 0; i < (signed)VT.getVectorNumElements(); ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if (Mask[i] < i)
+ return SDValue();
+ if (AlignVal == -1)
+ AlignVal = Mask[i] - i;
+ else if (Mask[i] - i != AlignVal)
+ return SDValue();
+ }
+ // Vector source operands should be swapped
+ return DAG.getNode(X86ISD::VALIGN, DL, VT, V2, V1,
+ DAG.getConstant(AlignVal, DL, MVT::i8));
}
-/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
-static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- SDLoc DL(Op);
- assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
- assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- ArrayRef<int> Mask = SVOp->getMask();
- assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane.
- 0, 16, 1, 17, 4, 20, 5, 21,
- // Second 128-bit lane.
- 8, 24, 9, 25, 12, 28, 13, 29}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane.
- 2, 18, 3, 19, 6, 22, 7, 23,
- // Second 128-bit lane.
- 10, 26, 11, 27, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
+ assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV");
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
+ MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
+ MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
+
+ SmallVector<SDValue, 32> VPermMask;
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
+ VPermMask.push_back(Mask[i] < 0 ? DAG.getUNDEF(MaskEltVT) :
+ DAG.getConstant(Mask[i], DL,MaskEltVT));
+ SDValue MaskNode = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecVT,
+ VPermMask);
+ if (isSingleInputShuffleMask(Mask))
+ return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
+
+ return DAG.getNode(X86ISD::VPERMV3, DL, VT, MaskNode, V1, V2);
}
-/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
-static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+
+/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
+static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
- assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
- assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
+ MVT VT = Op.getSimpleValueType();
+ assert((V1.getSimpleValueType() == MVT::v8f64 ||
+ V1.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
+ assert((V2.getSimpleValueType() == MVT::v8f64 ||
+ V2.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
@@ -10129,21 +10176,40 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
+ if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG))
+ return Op;
+
+ if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, VT, Mask, V1, V2, DAG))
+ return Op;
+
+ // PERMILPD instruction - mask 0/1, 0/1, 2/3, 2/3, 4/5, 4/5, 6/7, 6/7
+ if (isSingleInputShuffleMask(Mask)) {
+ if (!is128BitLaneCrossingShuffleMask(VT, Mask))
+ return DAG.getNode(X86ISD::VPERMILPI, DL, VT, V1,
+ get1bitLaneShuffleImm8ForMask(Mask, DL, DAG));
+
+ SmallVector<int, 4> RepeatedMask;
+ if (is256BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
+ return DAG.getNode(X86ISD::VPERMI, DL, VT, V1,
+ getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
+ }
+ return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
-static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
- assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
- assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
+ assert((V1.getSimpleValueType() == MVT::v16i32 ||
+ V1.getSimpleValueType() == MVT::v16f32) && "Bad operand type!");
+ assert((V2.getSimpleValueType() == MVT::v16i32 ||
+ V2.getSimpleValueType() == MVT::v16f32) && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
@@ -10154,16 +10220,39 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
0, 16, 1, 17, 4, 20, 5, 21,
// Second 128-bit lane.
8, 24, 9, 25, 12, 28, 13, 29}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask,
{// First 128-bit lane.
2, 18, 3, 19, 6, 22, 7, 23,
// Second 128-bit lane.
10, 26, 11, 27, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG);
+ if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
+ 12, 12, 14, 14}))
+ return DAG.getNode(X86ISD::MOVSLDUP, DL, VT, V1);
+ if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11,
+ 13, 13, 15, 15}))
+ return DAG.getNode(X86ISD::MOVSHDUP, DL, VT, V1);
+
+ SmallVector<int, 4> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) {
+ if (isSingleInputShuffleMask(Mask)) {
+ unsigned Opc = VT.isInteger() ? X86ISD::PSHUFD : X86ISD::VPERMILPI;
+ return DAG.getNode(Opc, DL, VT, V1,
+ getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
+ }
+
+ for (int i = 0; i < 4; ++i)
+ if (RepeatedMask[i] >= 16)
+ RepeatedMask[i] -= 12;
+ return lowerVectorShuffleWithSHUFPS(DL, VT, RepeatedMask, V1, V2, DAG);
+ }
+
+ if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG))
+ return Op;
+
+ return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
@@ -10223,13 +10312,11 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// the requisite ISA extensions for that element type are available.
switch (VT.SimpleTy) {
case MVT::v8f64:
- return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
- case MVT::v16f32:
- return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v8i64:
- return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ return lowerV8X64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v16f32:
case MVT::v16i32:
- return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ return lowerV16X32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v32i16:
if (Subtarget->hasBWI())
return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
@@ -10311,10 +10398,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
// Make sure that the new vector type is legal. For example, v2f64 isn't
// legal on SSE1.
if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
- V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask));
+ V1 = DAG.getBitcast(NewVT, V1);
+ V2 = DAG.getBitcast(NewVT, V2);
+ return DAG.getBitcast(
+ VT, DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask));
}
}
@@ -10509,12 +10596,11 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
// If Idx is 0, it's cheaper to do a move instead of a pextrw.
if (Idx == 0)
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BITCAST, dl,
- MVT::v4i32,
- Op.getOperand(0)),
- Op.getOperand(1)));
+ return DAG.getNode(
+ ISD::TRUNCATE, dl, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getBitcast(MVT::v4i32, Op.getOperand(0)),
+ Op.getOperand(1)));
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
@@ -10538,10 +10624,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
User->getValueType(0) != MVT::i32))
return SDValue();
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
- Op.getOperand(0)),
- Op.getOperand(1));
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
+ DAG.getBitcast(MVT::v4i32, Op.getOperand(0)),
+ Op.getOperand(1));
+ return DAG.getBitcast(MVT::f32, Extract);
}
if (VT == MVT::i32 || VT == MVT::i64) {
@@ -10655,8 +10740,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BITCAST, dl,
- MVT::v4i32, Vec),
+ DAG.getBitcast(MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT EltVT = MVT::i32;
@@ -10877,8 +10961,8 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
assert(OpVT.is128BitVector() && "Expected an SSE type!");
- return DAG.getNode(ISD::BITCAST, dl, OpVT,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
+ return DAG.getBitcast(
+ OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt));
}
// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
@@ -11670,14 +11754,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
- SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32,
- DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, XR1),
- CLod0);
+ SDValue Unpck1 =
+ getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0);
SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
- SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck1);
+ SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
@@ -11685,12 +11768,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
// FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
- SDValue S2F = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Sub);
+ SDValue S2F = DAG.getBitcast(MVT::v4i32, Sub);
SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32,
S2F, 0x4E, DAG);
Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Shuffle),
- Sub);
+ DAG.getBitcast(MVT::v2f64, Shuffle), Sub);
}
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
@@ -11713,20 +11795,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
+ DAG.getBitcast(MVT::v2f64, Load),
DAG.getIntPtrConstant(0, dl));
// Or the load with the bias.
- SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
- MVT::v2f64, Load)),
- DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
- MVT::v2f64, Bias)));
- Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
- DAG.getIntPtrConstant(0, dl));
+ SDValue Or = DAG.getNode(
+ ISD::OR, dl, MVT::v2i64,
+ DAG.getBitcast(MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Load)),
+ DAG.getBitcast(MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias)));
+ Or =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));
// Subtract the bias.
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
@@ -11805,19 +11886,16 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
if (Subtarget.hasSSE41()) {
EVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
- SDValue VecCstLowBitcast =
- DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstLow);
- SDValue VecBitcast = DAG.getNode(ISD::BITCAST, DL, VecI16VT, V);
+ SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow);
+ SDValue VecBitcast = DAG.getBitcast(VecI16VT, V);
// Low will be bitcasted right away, so do not bother bitcasting back to its
// original type.
Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32));
// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
// (uint4) 0x53000000, 0xaa);
- SDValue VecCstHighBitcast =
- DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstHigh);
- SDValue VecShiftBitcast =
- DAG.getNode(ISD::BITCAST, DL, VecI16VT, HighShift);
+ SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh);
+ SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift);
// High will be bitcasted right away, so do not bother bitcasting back to
// its original type.
High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
@@ -11843,11 +11921,11 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
makeArrayRef(&CstFAddArray[0], NumElts));
// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
- SDValue HighBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, High);
+ SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
SDValue FHigh =
DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
// return (float4) lo + fhi;
- SDValue LowBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, Low);
+ SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low);
return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
}
@@ -12103,8 +12181,8 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements()/2);
- OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
+ OpLo = DAG.getBitcast(HVT, OpLo);
+ OpHi = DAG.getBitcast(HVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
@@ -12189,14 +12267,14 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
if (InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 &&
Subtarget->hasBWI())
return Op; // legal, will go to VPMOVB2M, VPMOVW2M
- if ((InVT.is256BitVector() || InVT.is128BitVector())
+ if ((InVT.is256BitVector() || InVT.is128BitVector())
&& InVT.getScalarSizeInBits() <= 16 &&
Subtarget->hasBWI() && Subtarget->hasVLX())
return Op; // legal, will go to VPMOVB2M, VPMOVW2M
if (InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 &&
Subtarget->hasDQI())
return Op; // legal, will go to VPMOVD2M, VPMOVQ2M
- if ((InVT.is256BitVector() || InVT.is128BitVector())
+ if ((InVT.is256BitVector() || InVT.is128BitVector())
&& InVT.getScalarSizeInBits() >= 32 &&
Subtarget->hasDQI() && Subtarget->hasVLX())
return Op; // legal, will go to VPMOVB2M, VPMOVQ2M
@@ -12224,7 +12302,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget->hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
- In = DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, In);
+ In = DAG.getBitcast(MVT::v8i32, In);
In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
ShufMask);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
@@ -12235,8 +12313,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
DAG.getIntPtrConstant(0, DL));
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(2, DL));
- OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
+ OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
+ OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
static const int ShufMask[] = {0, 2, 4, 6};
return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask);
}
@@ -12244,7 +12322,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
// On AVX2, v8i32 -> v8i16 becomed PSHUFB.
if (Subtarget->hasInt256()) {
- In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
+ In = DAG.getBitcast(MVT::v32i8, In);
SmallVector<SDValue,32> pshufbMask;
for (unsigned i = 0; i < 2; ++i) {
@@ -12261,14 +12339,14 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
}
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, pshufbMask);
In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV);
- In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In);
+ In = DAG.getBitcast(MVT::v4i64, In);
static const int ShufMask[] = {0, 2, -1, -1};
In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64),
&ShufMask[0]);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(0, DL));
- return DAG.getNode(ISD::BITCAST, DL, VT, In);
+ return DAG.getBitcast(VT, In);
}
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
@@ -12277,8 +12355,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
DAG.getIntPtrConstant(4, DL));
- OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi);
+ OpLo = DAG.getBitcast(MVT::v16i8, OpLo);
+ OpHi = DAG.getBitcast(MVT::v16i8, OpHi);
// The PSHUFB mask:
static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
@@ -12288,13 +12366,13 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1);
OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1);
- OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
+ OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
+ OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
// The MOVLHPS Mask:
static const int ShufMask2[] = {0, 1, 4, 5};
SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2);
- return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, res);
+ return DAG.getBitcast(MVT::v8i16, res);
}
// Handle truncation of V256 to V128 using shuffles.
@@ -12310,8 +12388,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// Prepare truncation shuffle mask
for (unsigned i = 0; i != NumElems; ++i)
MaskVec[i] = i * 2;
- SDValue V = DAG.getVectorShuffle(NVT, DL,
- DAG.getNode(ISD::BITCAST, DL, NVT, In),
+ SDValue V = DAG.getVectorShuffle(NVT, DL, DAG.getBitcast(NVT, In),
DAG.getUNDEF(NVT), &MaskVec[0]);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V,
DAG.getIntPtrConstant(0, DL));
@@ -12420,13 +12497,12 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
// For a vector, cast operands to a vector type, perform the logic op,
// and cast the result back to the original value type.
MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
- SDValue MaskCasted = DAG.getNode(ISD::BITCAST, dl, VecVT, Mask);
- SDValue Operand = IsFNABS ?
- DAG.getNode(ISD::BITCAST, dl, VecVT, Op0.getOperand(0)) :
- DAG.getNode(ISD::BITCAST, dl, VecVT, Op0);
+ SDValue MaskCasted = DAG.getBitcast(VecVT, Mask);
+ SDValue Operand = IsFNABS ? DAG.getBitcast(VecVT, Op0.getOperand(0))
+ : DAG.getBitcast(VecVT, Op0);
unsigned BitOp = IsFABS ? ISD::AND : IsFNABS ? ISD::OR : ISD::XOR;
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted));
+ return DAG.getBitcast(VT,
+ DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted));
}
// If not vector, then scalar.
@@ -12591,7 +12667,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
// Cast all vectors into TestVT for PTEST.
for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
- VecIns[i] = DAG.getNode(ISD::BITCAST, DL, TestVT, VecIns[i]);
+ VecIns[i] = DAG.getBitcast(TestVT, VecIns[i]);
// If more than one full vectors are evaluated, OR them first before PTEST.
for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
@@ -12925,29 +13001,31 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps,
bool &UseOneConstNR) const {
- // FIXME: We should use instruction latency models to calculate the cost of
- // each potential sequence, but this is very hard to do reliably because
- // at least Intel's Core* chips have variable timing based on the number of
- // significant digits in the divisor and/or sqrt operand.
- if (!Subtarget->useSqrtEst())
- return SDValue();
-
EVT VT = Op.getValueType();
+ const char *RecipOp;
- // SSE1 has rsqrtss and rsqrtps.
+ // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
// TODO: Add support for AVX512 (v16f32).
// It is likely not profitable to do this for f64 because a double-precision
// rsqrt estimate with refinement on x86 prior to FMA requires at least 16
// instructions: convert to single, rsqrtss, convert back to double, refine
// (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
- (Subtarget->hasAVX() && VT == MVT::v8f32)) {
- RefinementSteps = 1;
- UseOneConstNR = false;
- return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
- }
- return SDValue();
+ if (VT == MVT::f32 && Subtarget->hasSSE1())
+ RecipOp = "sqrtf";
+ else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
+ (VT == MVT::v8f32 && Subtarget->hasAVX()))
+ RecipOp = "vec-sqrtf";
+ else
+ return SDValue();
+
+ TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
+ UseOneConstNR = false;
+ return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
}
/// The minimum architected relative accuracy is 2^-12. We need one
@@ -12955,15 +13033,9 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const {
- // FIXME: We should use instruction latency models to calculate the cost of
- // each potential sequence, but this is very hard to do reliably because
- // at least Intel's Core* chips have variable timing based on the number of
- // significant digits in the divisor.
- if (!Subtarget->useReciprocalEst())
- return SDValue();
-
EVT VT = Op.getValueType();
-
+ const char *RecipOp;
+
// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
// TODO: Add support for AVX512 (v16f32).
// It is likely not profitable to do this for f64 because a double-precision
@@ -12971,12 +13043,20 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
// 15 instructions: convert to single, rcpss, convert back to double, refine
// (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
- (Subtarget->hasAVX() && VT == MVT::v8f32)) {
- RefinementSteps = ReciprocalEstimateRefinementSteps;
- return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
- }
- return SDValue();
+ if (VT == MVT::f32 && Subtarget->hasSSE1())
+ RecipOp = "divf";
+ else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
+ (VT == MVT::v8f32 && Subtarget->hasAVX()))
+ RecipOp = "vec-divf";
+ else
+ return SDValue();
+
+ TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
+ return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
}
/// If we have at least two divisions that use the same divisor, convert to
@@ -13407,8 +13487,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
assert(Subtarget->hasSSE2() && "Don't know how to lower!");
// First cast everything to the right type.
- Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
- Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+ Op0 = DAG.getBitcast(MVT::v4i32, Op0);
+ Op1 = DAG.getBitcast(MVT::v4i32, Op1);
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations. The lower
@@ -13442,7 +13522,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (Invert)
Result = DAG.getNOT(dl, Result, MVT::v4i32);
- return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ return DAG.getBitcast(VT, Result);
}
if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
@@ -13451,8 +13531,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
// First cast everything to the right type.
- Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
- Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+ Op0 = DAG.getBitcast(MVT::v4i32, Op0);
+ Op1 = DAG.getBitcast(MVT::v4i32, Op1);
// Do the compare.
SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
@@ -13465,7 +13545,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (Invert)
Result = DAG.getNOT(dl, Result, MVT::v4i32);
- return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ return DAG.getBitcast(VT, Result);
}
}
@@ -13662,7 +13742,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp);
EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
- VCmp = DAG.getNode(ISD::BITCAST, DL, VCmpVT, VCmp);
+ VCmp = DAG.getBitcast(VCmpVT, VCmp);
SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2);
@@ -13687,12 +13767,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
Op2Scalar = Op2.getOperand(0);
if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
- SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
+ SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
Op1Scalar.getValueType(),
Cond, Op1Scalar, Op2Scalar);
if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
- return DAG.getNode(ISD::BITCAST, DL, VT, newSelect);
- SDValue ExtVec = DAG.getNode(ISD::BITCAST, DL, MVT::v8i1, newSelect);
+ return DAG.getBitcast(VT, newSelect);
+ SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
DAG.getIntPtrConstant(0, DL));
}
@@ -13975,7 +14055,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
- Curr = DAG.getNode(ISD::BITCAST, dl, CurrVT, Curr);
+ Curr = DAG.getBitcast(CurrVT, Curr);
}
SDValue SignExt = Curr;
@@ -13993,7 +14073,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
DAG.getConstant(31, dl, MVT::i8));
SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5});
- return DAG.getNode(ISD::BITCAST, dl, VT, Ext);
+ return DAG.getBitcast(VT, Ext);
}
return SDValue();
@@ -14202,7 +14282,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
// Bitcast the loaded value to a vector of the original element type, in
// the size of the target vector type.
- SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
+ SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res);
unsigned SizeRatio = RegSz / MemSz;
if (Ext == ISD::SEXTLOAD) {
@@ -14227,7 +14307,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
SDValue Shuff = DAG.getVectorShuffle(
WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
- Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+ Shuff = DAG.getBitcast(RegVT, Shuff);
// Build the arithmetic shift.
unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
@@ -14249,7 +14329,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
// Bitcast to the requested type.
- Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+ Shuff = DAG.getBitcast(RegVT, Shuff);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Shuff;
}
@@ -14933,7 +15013,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
MVT EltVT = VT.getVectorElementType();
EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
- ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt);
+ ShAmt = DAG.getBitcast(ShVT, ShAmt);
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
@@ -14959,8 +15039,8 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
- DAG.getIntPtrConstant(0, dl));
+ DAG.getBitcast(BitcastVT, Mask),
+ DAG.getIntPtrConstant(0, dl));
switch (Op.getOpcode()) {
default: break;
@@ -15017,12 +15097,31 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Op.getOperand(2), Op.getOperand(3));
case INTR_TYPE_1OP_MASK_RM: {
SDValue Src = Op.getOperand(1);
- SDValue Src0 = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
- SDValue RoundingMode = Op.getOperand(4);
+ SDValue RoundingMode;
+ if (Op.getNumOperands() == 4)
+ RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ else
+ RoundingMode = Op.getOperand(4);
+ unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+ if (IntrWithRoundingModeOpcode != 0) {
+ unsigned Round = cast<ConstantSDNode>(RoundingMode)->getZExtValue();
+ if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION)
+ return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+ dl, Op.getValueType(), Src, RoundingMode),
+ Mask, PassThru, Subtarget, DAG);
+ }
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
RoundingMode),
- Mask, Src0, Subtarget, DAG);
+ Mask, PassThru, Subtarget, DAG);
+ }
+ case INTR_TYPE_1OP_MASK: {
+ SDValue Src = Op.getOperand(1);
+ SDValue Passthru = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
+ Mask, Passthru, Subtarget, DAG);
}
case INTR_TYPE_SCALAR_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
@@ -15069,6 +15168,30 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1,Src2),
Mask, PassThru, Subtarget, DAG);
}
+ case INTR_TYPE_3OP_MASK: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue PassThru = Op.getOperand(4);
+ SDValue Mask = Op.getOperand(5);
+ // We specify 2 possible opcodes for intrinsics with rounding modes.
+ // First, we check if the intrinsic may have non-default rounding mode,
+ // (IntrData->Opc1 != 0), then we check the rounding mode operand.
+ unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+ if (IntrWithRoundingModeOpcode != 0) {
+ SDValue Rnd = Op.getOperand(6);
+ unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
+ if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
+ return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+ dl, Op.getValueType(),
+ Src1, Src2, Src3, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
+ }
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ Src1, Src2, Src3),
+ Mask, PassThru, Subtarget, DAG);
+ }
case FMA_OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
@@ -15140,7 +15263,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
DAG.getUNDEF(BitcastVT), CmpMask,
DAG.getIntPtrConstant(0, dl));
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ return DAG.getBitcast(Op.getValueType(), Res);
}
case COMI: { // Comparison intrinsics
ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
@@ -15176,7 +15299,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Mask.getValueType().getSizeInBits());
SDLoc dl(Op);
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress,
@@ -15191,7 +15314,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Mask.getValueType().getSizeInBits());
SDLoc dl(Op);
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
Op.getOperand(2));
@@ -15211,16 +15334,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
- case Intrinsic::x86_avx512_mask_valign_q_512:
- case Intrinsic::x86_avx512_mask_valign_d_512:
- // Vector source operands are swapped.
- return getVectorMaskingNode(DAG.getNode(X86ISD::VALIGN, dl,
- Op.getValueType(), Op.getOperand(2),
- Op.getOperand(1),
- Op.getOperand(3)),
- Op.getOperand(5), Op.getOperand(4),
- Subtarget, DAG);
-
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
@@ -15289,8 +15402,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
case Intrinsic::x86_avx512_kortestz_w:
case Intrinsic::x86_avx512_kortestc_w: {
unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B;
- SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
- SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
+ SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
+ SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8);
SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test);
@@ -15378,7 +15491,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
// Compute the symbol for the LSDA. We know it'll get emitted later.
MachineFunction &MF = DAG.getMachineFunction();
SDValue Op1 = Op.getOperand(1);
- Op1->dump();
auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());
MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol(
GlobalValue::getRealLinkageName(Fn->getName()));
@@ -15409,7 +15521,7 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
else
- MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ MaskInReg = DAG.getBitcast(MaskVT, Mask);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
@@ -15437,7 +15549,7 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
else
- MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ MaskInReg = DAG.getBitcast(MaskVT, Mask);
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
@@ -15460,7 +15572,7 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
else
- MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ MaskInReg = DAG.getBitcast(MaskVT, Mask);
//SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Ops[] = {MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
@@ -15693,23 +15805,25 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
+ EVT VT = DataToCompress.getValueType();
if (isAllOnes(Mask)) // return just a store
return DAG.getStore(Chain, dl, DataToCompress, Addr,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false,
+ VT.getScalarSizeInBits()/8);
- EVT VT = DataToCompress.getValueType();
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask,
DataToCompress, DAG.getUNDEF(VT));
return DAG.getStore(Chain, dl, Compressed, Addr,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false,
+ VT.getScalarSizeInBits()/8);
}
case EXPAND_FROM_MEM: {
SDLoc dl(Op);
@@ -15721,17 +15835,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
if (isAllOnes(Mask)) // return just a load
return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
- false, 0);
+ false, VT.getScalarSizeInBits()/8);
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(),
- false, false, false, 0);
+ false, false, false,
+ VT.getScalarSizeInBits()/8);
SDValue Results[] = {
DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand, PathThru),
@@ -16274,8 +16389,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
-1, 4, -1, 5, -1, 6, -1, 7};
ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- ALo = DAG.getNode(ISD::BITCAST, dl, ExVT, ALo);
- BLo = DAG.getNode(ISD::BITCAST, dl, ExVT, BLo);
+ ALo = DAG.getBitcast(ExVT, ALo);
+ BLo = DAG.getBitcast(ExVT, BLo);
ALo = DAG.getNode(ISD::SRA, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT));
BLo = DAG.getNode(ISD::SRA, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT));
}
@@ -16294,8 +16409,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
-1, 12, -1, 13, -1, 14, -1, 15};
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- AHi = DAG.getNode(ISD::BITCAST, dl, ExVT, AHi);
- BHi = DAG.getNode(ISD::BITCAST, dl, ExVT, BHi);
+ AHi = DAG.getBitcast(ExVT, AHi);
+ BHi = DAG.getBitcast(ExVT, BHi);
AHi = DAG.getNode(ISD::SRA, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT));
BHi = DAG.getNode(ISD::SRA, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT));
}
@@ -16323,8 +16438,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
// Now multiply odd parts.
SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds);
- Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens);
- Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds);
+ Evens = DAG.getBitcast(VT, Evens);
+ Odds = DAG.getBitcast(VT, Odds);
// Merge the two vectors back together with a shuffle. This expands into 2
// shuffles.
@@ -16352,10 +16467,10 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
// Bit cast to 32-bit vectors for MULUDQ
EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
(VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
- A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
- B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
- Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
- Bhi = DAG.getNode(ISD::BITCAST, dl, MulVT, Bhi);
+ A = DAG.getBitcast(MulVT, A);
+ B = DAG.getBitcast(MulVT, B);
+ Ahi = DAG.getBitcast(MulVT, Ahi);
+ Bhi = DAG.getBitcast(MulVT, Bhi);
SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
@@ -16417,7 +16532,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
.setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
- return DAG.getNode(ISD::BITCAST, dl, VT, CallInfo.first);
+ return DAG.getBitcast(VT, CallInfo.first);
}
static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
@@ -16455,12 +16570,10 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
(!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
// PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
// => <2 x i64> <ae|cg>
- SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
+ SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
// PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
// => <2 x i64> <bf|dh>
- SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
+ SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
// Shuffle it back into the right order.
SDValue Highs, Lows;
@@ -16499,16 +16612,16 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
// Return true if the requred (according to Opcode) shift-imm form is natively
// supported by the Subtarget
-static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
+static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
if (VT.getScalarSizeInBits() < 16)
return false;
-
+
if (VT.is512BitVector() &&
(VT.getScalarSizeInBits() > 16 || Subtarget->hasBWI()))
return true;
- bool LShift = VT.is128BitVector() ||
+ bool LShift = VT.is128BitVector() ||
(VT.is256BitVector() && Subtarget->hasInt256());
bool AShift = LShift && (Subtarget->hasVLX() ||
@@ -16518,15 +16631,15 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
// The shift amount is a variable, but it is the same for all vector lanes.
// These instrcutions are defined together with shift-immediate.
-static
-bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget,
+static
+bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
}
// Return true if the requred (according to Opcode) variable-shift form is
// natively supported by the Subtarget
-static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget,
+static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
if (!Subtarget->hasInt256() || VT.getScalarSizeInBits() < 16)
@@ -16574,7 +16687,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
// Make a large shift.
SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT,
R, ShiftAmt, DAG);
- SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
+ SHL = DAG.getBitcast(VT, SHL);
// Zero out the rightmost bits.
SmallVector<SDValue, 32> V(
NumElts, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, MVT::i8));
@@ -16585,7 +16698,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
// Make a large shift.
SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT,
R, ShiftAmt, DAG);
- SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
+ SRL = DAG.getBitcast(VT, SRL);
// Zero out the leftmost bits.
SmallVector<SDValue, 32> V(
NumElts, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, MVT::i8));
@@ -16801,7 +16914,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
Op = DAG.getNode(ISD::ADD, dl, VT, Op,
DAG.getConstant(0x3f800000U, dl, VT));
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
+ Op = DAG.getBitcast(MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
@@ -16871,11 +16984,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
if (TargetOpcode == X86ISD::MOVSD)
CastVT = MVT::v2i64;
- SDValue BitCast1 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift1);
- SDValue BitCast2 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift2);
+ SDValue BitCast1 = DAG.getBitcast(CastVT, Shift1);
+ SDValue BitCast2 = DAG.getBitcast(CastVT, Shift2);
SDValue Result = getTargetShuffleNode(TargetOpcode, dl, CastVT, BitCast2,
BitCast1, DAG);
- return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ return DAG.getBitcast(VT, Result);
}
}
@@ -16931,10 +17044,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z);
SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, R, R);
SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, R, R);
- ALo = DAG.getNode(ISD::BITCAST, dl, ExtVT, ALo);
- AHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, AHi);
- RLo = DAG.getNode(ISD::BITCAST, dl, ExtVT, RLo);
- RHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, RHi);
+ ALo = DAG.getBitcast(ExtVT, ALo);
+ AHi = DAG.getBitcast(ExtVT, AHi);
+ RLo = DAG.getBitcast(ExtVT, RLo);
+ RHi = DAG.getBitcast(ExtVT, RHi);
SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo);
SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi);
Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT));
@@ -17293,7 +17406,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Elts);
- SDValue ToV2F64 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, BV);
+ SDValue ToV2F64 = DAG.getBitcast(MVT::v2f64, BV);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
DAG.getIntPtrConstant(0, dl));
}
@@ -17315,141 +17428,241 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
return SDValue();
}
-static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- SDNode *Node = Op.getNode();
- SDLoc dl(Node);
+/// Compute the horizontal sum of bytes in V for the elements of VT.
+///
+/// Requires V to be a byte vector and VT to be an integer vector type with
+/// wider elements than V's type. The width of the elements of VT determines
+/// how many bytes of V are summed horizontally to produce each element of the
+/// result.
+static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(V);
+ MVT ByteVecVT = V.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ int NumElts = VT.getVectorNumElements();
+ assert(ByteVecVT.getVectorElementType() == MVT::i8 &&
+ "Expected value to have byte element type.");
+ assert(EltVT != MVT::i8 &&
+ "Horizontal byte sum only makes sense for wider elements!");
+ unsigned VecSize = VT.getSizeInBits();
+ assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!");
+
+ // PSADBW instruction horizontally add all bytes and leave the result in i64
+ // chunks, thus directly computes the pop count for v2i64 and v4i64.
+ if (EltVT == MVT::i64) {
+ SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
+ V = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, V, Zeros);
+ return DAG.getBitcast(VT, V);
+ }
+
+ if (EltVT == MVT::i32) {
+ // We unpack the low half and high half into i32s interleaved with zeros so
+ // that we can use PSADBW to horizontally sum them. The most useful part of
+ // this is that it lines up the results of two PSADBW instructions to be
+ // two v2i64 vectors which concatenated are the 4 population counts. We can
+ // then use PACKUSWB to shrink and concatenate them into a v4i32 again.
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL);
+ SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V, Zeros);
+ SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V, Zeros);
+
+ // Do the horizontal sums into two v2i64s.
+ Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
+ Low = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT,
+ DAG.getBitcast(ByteVecVT, Low), Zeros);
+ High = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT,
+ DAG.getBitcast(ByteVecVT, High), Zeros);
+
+ // Merge them together.
+ MVT ShortVecVT = MVT::getVectorVT(MVT::i16, VecSize / 16);
+ V = DAG.getNode(X86ISD::PACKUS, DL, ByteVecVT,
+ DAG.getBitcast(ShortVecVT, Low),
+ DAG.getBitcast(ShortVecVT, High));
+
+ return DAG.getBitcast(VT, V);
+ }
+
+ // The only element type left is i16.
+ assert(EltVT == MVT::i16 && "Unknown how to handle type");
+
+ // To obtain pop count for each i16 element starting from the pop count for
+ // i8 elements, shift the i16s left by 8, sum as i8s, and then shift as i16s
+ // right by 8. It is important to shift as i16s as i8 vector shift isn't
+ // directly supported.
+ SmallVector<SDValue, 16> Shifters(NumElts, DAG.getConstant(8, DL, EltVT));
+ SDValue Shifter = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Shifters);
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), Shifter);
+ V = DAG.getNode(ISD::ADD, DL, ByteVecVT, DAG.getBitcast(ByteVecVT, Shl),
+ DAG.getBitcast(ByteVecVT, V));
+ return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), Shifter);
+}
+
+static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, SDLoc DL,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ unsigned VecSize = VT.getSizeInBits();
- Op = Op.getOperand(0);
- EVT VT = Op.getValueType();
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
- "CTPOP lowering only implemented for 128/256-bit wide vector types");
+ // Implement a lookup table in register by using an algorithm based on:
+ // http://wm.ite.pl/articles/sse-popcount.html
+ //
+ // The general idea is that every lower byte nibble in the input vector is an
+ // index into a in-register pre-computed pop count table. We then split up the
+ // input vector in two new ones: (1) a vector with only the shifted-right
+ // higher nibbles for each byte and (2) a vector with the lower nibbles (and
+ // masked out higher ones) for each byte. PSHUB is used separately with both
+ // to index the in-register table. Next, both are added and the result is a
+ // i8 vector where each element contains the pop count for input byte.
+ //
+ // To obtain the pop count for elements != i8, we follow up with the same
+ // approach and use additional tricks as described below.
+ //
+ const int LUT[16] = {/* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2,
+ /* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3,
+ /* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3,
+ /* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4};
+
+ int NumByteElts = VecSize / 8;
+ MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts);
+ SDValue In = DAG.getBitcast(ByteVecVT, Op);
+ SmallVector<SDValue, 16> LUTVec;
+ for (int i = 0; i < NumByteElts; ++i)
+ LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
+ SDValue InRegLUT = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, LUTVec);
+ SmallVector<SDValue, 16> Mask0F(NumByteElts,
+ DAG.getConstant(0x0F, DL, MVT::i8));
+ SDValue M0F = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, Mask0F);
+
+ // High nibbles
+ SmallVector<SDValue, 16> Four(NumByteElts, DAG.getConstant(4, DL, MVT::i8));
+ SDValue FourV = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, Four);
+ SDValue HighNibbles = DAG.getNode(ISD::SRL, DL, ByteVecVT, In, FourV);
+
+ // Low nibbles
+ SDValue LowNibbles = DAG.getNode(ISD::AND, DL, ByteVecVT, In, M0F);
+
+ // The input vector is used as the shuffle mask that index elements into the
+ // LUT. After counting low and high nibbles, add the vector to obtain the
+ // final pop count per i8 element.
+ SDValue HighPopCnt =
+ DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, HighNibbles);
+ SDValue LowPopCnt =
+ DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, LowNibbles);
+ SDValue PopCnt = DAG.getNode(ISD::ADD, DL, ByteVecVT, HighPopCnt, LowPopCnt);
- unsigned NumElts = VT.getVectorNumElements();
- EVT EltVT = VT.getVectorElementType();
- unsigned Len = EltVT.getSizeInBits();
+ if (EltVT == MVT::i8)
+ return PopCnt;
+
+ return LowerHorizontalByteSum(PopCnt, VT, Subtarget, DAG);
+}
+
+static SDValue LowerVectorCTPOPBitmath(SDValue Op, SDLoc DL,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.is128BitVector() &&
+ "Only 128-bit vector bitmath lowering supported.");
+
+ int VecSize = VT.getSizeInBits();
+ MVT EltVT = VT.getVectorElementType();
+ int Len = EltVT.getSizeInBits();
// This is the vectorized version of the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
// with a minor tweak to use a series of adds + shifts instead of vector
- // multiplications. Implemented for the v2i64, v4i64, v4i32, v8i32 types:
- //
- // v2i64, v4i64, v4i32 => Only profitable w/ popcnt disabled
- // v8i32 => Always profitable
- //
- // FIXME: There a couple of possible improvements:
- //
- // 1) Support for i8 and i16 vectors (needs measurements if popcnt enabled).
- // 2) Use strategies from http://wm.ite.pl/articles/sse-popcount.html
- //
- assert(EltVT.isInteger() && (Len == 32 || Len == 64) && Len % 8 == 0 &&
- "CTPOP not implemented for this vector element type.");
+ // multiplications. Implemented for all integer vector types. We only use
+ // this when we don't have SSSE3 which allows a LUT-based lowering that is
+ // much faster, even faster than using native popcnt instructions.
+
+ auto GetShift = [&](unsigned OpCode, SDValue V, int Shifter) {
+ MVT VT = V.getSimpleValueType();
+ SmallVector<SDValue, 32> Shifters(
+ VT.getVectorNumElements(),
+ DAG.getConstant(Shifter, DL, VT.getVectorElementType()));
+ return DAG.getNode(OpCode, DL, VT, V,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Shifters));
+ };
+ auto GetMask = [&](SDValue V, APInt Mask) {
+ MVT VT = V.getSimpleValueType();
+ SmallVector<SDValue, 32> Masks(
+ VT.getVectorNumElements(),
+ DAG.getConstant(Mask, DL, VT.getVectorElementType()));
+ return DAG.getNode(ISD::AND, DL, VT, V,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Masks));
+ };
- // X86 canonicalize ANDs to vXi64, generate the appropriate bitcasts to avoid
- // extra legalization.
- bool NeedsBitcast = EltVT == MVT::i32;
- MVT BitcastVT = VT.is256BitVector() ? MVT::v4i64 : MVT::v2i64;
+ // We don't want to incur the implicit masks required to SRL vNi8 vectors on
+ // x86, so set the SRL type to have elements at least i16 wide. This is
+ // correct because all of our SRLs are followed immediately by a mask anyways
+ // that handles any bits that sneak into the high bits of the byte elements.
+ MVT SrlVT = Len > 8 ? VT : MVT::getVectorVT(MVT::i16, VecSize / 16);
- SDValue Cst55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl,
- EltVT);
- SDValue Cst33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl,
- EltVT);
- SDValue Cst0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl,
- EltVT);
+ SDValue V = Op;
// v = v - ((v >> 1) & 0x55555555...)
- SmallVector<SDValue, 8> Ones(NumElts, DAG.getConstant(1, dl, EltVT));
- SDValue OnesV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ones);
- SDValue Srl = DAG.getNode(ISD::SRL, dl, VT, Op, OnesV);
- if (NeedsBitcast)
- Srl = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Srl);
-
- SmallVector<SDValue, 8> Mask55(NumElts, Cst55);
- SDValue M55 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask55);
- if (NeedsBitcast)
- M55 = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M55);
-
- SDValue And = DAG.getNode(ISD::AND, dl, Srl.getValueType(), Srl, M55);
- if (VT != And.getValueType())
- And = DAG.getNode(ISD::BITCAST, dl, VT, And);
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op, And);
+ SDValue Srl =
+ DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 1));
+ SDValue And = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x55)));
+ V = DAG.getNode(ISD::SUB, DL, VT, V, And);
// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
- SmallVector<SDValue, 8> Mask33(NumElts, Cst33);
- SDValue M33 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask33);
- SmallVector<SDValue, 8> Twos(NumElts, DAG.getConstant(2, dl, EltVT));
- SDValue TwosV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Twos);
+ SDValue AndLHS = GetMask(V, APInt::getSplat(Len, APInt(8, 0x33)));
+ Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 2));
+ SDValue AndRHS = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x33)));
+ V = DAG.getNode(ISD::ADD, DL, VT, AndLHS, AndRHS);
- Srl = DAG.getNode(ISD::SRL, dl, VT, Sub, TwosV);
- if (NeedsBitcast) {
- Srl = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Srl);
- M33 = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M33);
- Sub = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Sub);
- }
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 4));
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, V, Srl);
+ V = GetMask(Add, APInt::getSplat(Len, APInt(8, 0x0F)));
- SDValue AndRHS = DAG.getNode(ISD::AND, dl, M33.getValueType(), Srl, M33);
- SDValue AndLHS = DAG.getNode(ISD::AND, dl, M33.getValueType(), Sub, M33);
- if (VT != AndRHS.getValueType()) {
- AndRHS = DAG.getNode(ISD::BITCAST, dl, VT, AndRHS);
- AndLHS = DAG.getNode(ISD::BITCAST, dl, VT, AndLHS);
- }
- SDValue Add = DAG.getNode(ISD::ADD, dl, VT, AndLHS, AndRHS);
+ // At this point, V contains the byte-wise population count, and we are
+ // merely doing a horizontal sum if necessary to get the wider element
+ // counts.
+ if (EltVT == MVT::i8)
+ return V;
- // v = (v + (v >> 4)) & 0x0F0F0F0F...
- SmallVector<SDValue, 8> Fours(NumElts, DAG.getConstant(4, dl, EltVT));
- SDValue FoursV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Fours);
- Srl = DAG.getNode(ISD::SRL, dl, VT, Add, FoursV);
- Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl);
-
- SmallVector<SDValue, 8> Mask0F(NumElts, Cst0F);
- SDValue M0F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask0F);
- if (NeedsBitcast) {
- Add = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Add);
- M0F = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M0F);
- }
- And = DAG.getNode(ISD::AND, dl, M0F.getValueType(), Add, M0F);
- if (VT != And.getValueType())
- And = DAG.getNode(ISD::BITCAST, dl, VT, And);
-
- // The algorithm mentioned above uses:
- // v = (v * 0x01010101...) >> (Len - 8)
- //
- // Change it to use vector adds + vector shifts which yield faster results on
- // Haswell than using vector integer multiplication.
- //
- // For i32 elements:
- // v = v + (v >> 8)
- // v = v + (v >> 16)
- //
- // For i64 elements:
- // v = v + (v >> 8)
- // v = v + (v >> 16)
- // v = v + (v >> 32)
- //
- Add = And;
- SmallVector<SDValue, 8> Csts;
- for (unsigned i = 8; i <= Len/2; i *= 2) {
- Csts.assign(NumElts, DAG.getConstant(i, dl, EltVT));
- SDValue CstsV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Csts);
- Srl = DAG.getNode(ISD::SRL, dl, VT, Add, CstsV);
- Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl);
- Csts.clear();
+ return LowerHorizontalByteSum(
+ DAG.getBitcast(MVT::getVectorVT(MVT::i8, VecSize / 8), V), VT, Subtarget,
+ DAG);
+}
+
+static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ // FIXME: Need to add AVX-512 support here!
+ assert((VT.is256BitVector() || VT.is128BitVector()) &&
+ "Unknown CTPOP type to handle");
+ SDLoc DL(Op.getNode());
+ SDValue Op0 = Op.getOperand(0);
+
+ if (!Subtarget->hasSSSE3()) {
+ // We can't use the fast LUT approach, so fall back on vectorized bitmath.
+ assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!");
+ return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG);
}
- // The result is on the least significant 6-bits on i32 and 7-bits on i64.
- SDValue Cst3F = DAG.getConstant(APInt(Len, Len == 32 ? 0x3F : 0x7F), dl,
- EltVT);
- SmallVector<SDValue, 8> Cst3FV(NumElts, Cst3F);
- SDValue M3F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Cst3FV);
- if (NeedsBitcast) {
- Add = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Add);
- M3F = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M3F);
+ if (VT.is256BitVector() && !Subtarget->hasInt256()) {
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // Extract each 128-bit vector, compute pop count and concat the result.
+ SDValue LHS = Extract128BitVector(Op0, 0, DAG, DL);
+ SDValue RHS = Extract128BitVector(Op0, NumElems/2, DAG, DL);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
+ LowerVectorCTPOPInRegLUT(LHS, DL, Subtarget, DAG),
+ LowerVectorCTPOPInRegLUT(RHS, DL, Subtarget, DAG));
}
- And = DAG.getNode(ISD::AND, dl, M3F.getValueType(), Add, M3F);
- if (VT != And.getValueType())
- And = DAG.getNode(ISD::BITCAST, dl, VT, And);
- return And;
+ return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG);
+}
+
+static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ assert(Op.getValueType().isVector() &&
+ "We only do custom lowering for vector population count.");
+ return LowerVectorCTPOP(Op, Subtarget, DAG);
}
static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
@@ -17840,8 +18053,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
MVT::f64);
SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias);
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, VBias));
- Or = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or);
+ DAG.getBitcast(MVT::v2i64, VBias));
+ Or = DAG.getBitcast(MVT::v2f64, Or);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
return;
@@ -17964,7 +18177,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, N->getOperand(0));
- SDValue ToVecInt = DAG.getNode(ISD::BITCAST, dl, WiderVT, Expanded);
+ SDValue ToVecInt = DAG.getBitcast(WiderVT, Expanded);
if (ExperimentalVectorWideningLegalization) {
// If we are legalizing vectors by widening, we already have the desired
@@ -17994,7 +18207,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FANDN: return "X86ISD::FANDN";
case X86ISD::FOR: return "X86ISD::FOR";
case X86ISD::FXOR: return "X86ISD::FXOR";
- case X86ISD::FSRL: return "X86ISD::FSRL";
case X86ISD::FILD: return "X86ISD::FILD";
case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
@@ -18121,6 +18333,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
case X86ISD::SHUFP: return "X86ISD::SHUFP";
+ case X86ISD::SHUF128: return "X86ISD::SHUF128";
case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
@@ -18143,8 +18356,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
+ case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
+ case X86ISD::VRANGE: return "X86ISD::VRANGE";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
+ case X86ISD::PSADBW: return "X86ISD::PSADBW";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
@@ -18184,6 +18400,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND";
case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND";
case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
+ case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
+ case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
case X86ISD::ADDS: return "X86ISD::ADDS";
case X86ISD::SUBS: return "X86ISD::SUBS";
}
@@ -18193,7 +18411,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
Reloc::Model R = getTargetMachine().getRelocationModel();
@@ -20028,7 +20247,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
SDValue(ResNode.getNode(), 1));
}
- return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
+ return DAG.getBitcast(VT, ResNode);
}
}
@@ -20087,7 +20306,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// Just remove no-op shuffle masks.
if (Mask.size() == 1) {
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Input),
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
/*AddTo*/ true);
return true;
}
@@ -20123,14 +20342,14 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
}
if (Depth == 1 && Root->getOpcode() == Shuffle)
return false; // Nothing to do!
- Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ Op = DAG.getBitcast(ShuffleVT, Input);
DCI.AddToWorklist(Op.getNode());
if (Shuffle == X86ISD::MOVDDUP)
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
else
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
DCI.AddToWorklist(Op.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op),
/*AddTo*/ true);
return true;
}
@@ -20141,11 +20360,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
MVT ShuffleVT = MVT::v4f32;
if (Depth == 1 && Root->getOpcode() == Shuffle)
return false; // Nothing to do!
- Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ Op = DAG.getBitcast(ShuffleVT, Input);
DCI.AddToWorklist(Op.getNode());
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
DCI.AddToWorklist(Op.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op),
/*AddTo*/ true);
return true;
}
@@ -20155,11 +20374,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
MVT ShuffleVT = MVT::v4f32;
if (Depth == 1 && Root->getOpcode() == Shuffle)
return false; // Nothing to do!
- Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ Op = DAG.getBitcast(ShuffleVT, Input);
DCI.AddToWorklist(Op.getNode());
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
DCI.AddToWorklist(Op.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op),
/*AddTo*/ true);
return true;
}
@@ -20189,11 +20408,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
default:
llvm_unreachable("Impossible mask size!");
};
- Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ Op = DAG.getBitcast(ShuffleVT, Input);
DCI.AddToWorklist(Op.getNode());
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
DCI.AddToWorklist(Op.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op),
/*AddTo*/ true);
return true;
}
@@ -20222,14 +20441,14 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8));
}
MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes);
- Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Input);
+ Op = DAG.getBitcast(ByteVT, Input);
DCI.AddToWorklist(Op.getNode());
SDValue PSHUFBMaskOp =
DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVT, PSHUFBMask);
DCI.AddToWorklist(PSHUFBMaskOp.getNode());
Op = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Op, PSHUFBMaskOp);
DCI.AddToWorklist(Op.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op),
/*AddTo*/ true);
return true;
}
@@ -20401,7 +20620,7 @@ static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
#ifndef NDEBUG
for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i)
for (int j = 0; j < LaneElts; ++j)
- assert(Mask[j] == Mask[i * LaneElts + j] - LaneElts &&
+ assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) &&
"Mask doesn't repeat in high 128-bit lanes!");
#endif
Mask.resize(LaneElts);
@@ -20532,7 +20751,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
SDValue W = Chain.pop_back_val();
if (V.getValueType() != W.getOperand(0).getValueType())
- V = DAG.getNode(ISD::BITCAST, DL, W.getOperand(0).getValueType(), V);
+ V = DAG.getBitcast(W.getOperand(0).getValueType(), V);
switch (W.getOpcode()) {
default:
@@ -20551,7 +20770,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
}
}
if (V.getValueType() != N.getValueType())
- V = DAG.getNode(ISD::BITCAST, DL, N.getValueType(), V);
+ V = DAG.getBitcast(N.getValueType(), V);
// Return the new chain to replace N.
return V;
@@ -20668,12 +20887,12 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
DMask[DOffset + 0] = DOffset + 1;
DMask[DOffset + 1] = DOffset + 0;
MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
- V = DAG.getNode(ISD::BITCAST, DL, DVT, V);
+ V = DAG.getBitcast(DVT, V);
DCI.AddToWorklist(V.getNode());
V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V,
getV4X86ShuffleImm8ForMask(DMask, DL, DAG));
DCI.AddToWorklist(V.getNode());
- return DAG.getNode(ISD::BITCAST, DL, VT, V);
+ return DAG.getBitcast(VT, V);
}
// Look for shuffle patterns which can be implemented as a single unpack.
@@ -20704,7 +20923,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) {
// We can replace all three shuffles with an unpack.
- V = DAG.getNode(ISD::BITCAST, DL, VT, D.getOperand(0));
+ V = DAG.getBitcast(VT, D.getOperand(0));
DCI.AddToWorklist(V.getNode());
return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
: X86ISD::UNPCKH,
@@ -20848,8 +21067,8 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
CanFold = SVOp->getMaskElt(i) < 0;
if (CanFold) {
- SDValue BC00 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(0));
- SDValue BC01 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(1));
+ SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0));
+ SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1));
SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01);
return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, &SVOp->getMask()[0]);
}
@@ -20981,7 +21200,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
Shuffle = DAG.getVectorShuffle(CurrentVT, dl,
InVec.getOperand(0), Shuffle,
&ShuffleMask[0]);
- Shuffle = DAG.getNode(ISD::BITCAST, dl, OriginalVT, Shuffle);
+ Shuffle = DAG.getBitcast(OriginalVT, Shuffle);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
EltNo);
}
@@ -21101,7 +21320,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
SDValue Vals[4];
if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
- SDValue Cst = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, InputVector);
+ SDValue Cst = DAG.getBitcast(MVT::v2i64, InputVector);
EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy();
SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
DAG.getConstant(0, dl, VecIdxTy));
@@ -21717,13 +21936,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (TValIsAllOnes && FValIsAllZeros)
Ret = Cond;
else if (TValIsAllOnes)
- Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond,
- DAG.getNode(ISD::BITCAST, DL, CondVT, RHS));
+ Ret =
+ DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS));
else if (FValIsAllZeros)
Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond,
- DAG.getNode(ISD::BITCAST, DL, CondVT, LHS));
+ DAG.getBitcast(CondVT, LHS));
- return DAG.getNode(ISD::BITCAST, DL, VT, Ret);
+ return DAG.getBitcast(VT, Ret);
}
}
@@ -22554,15 +22773,13 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
// and work with those going forward.
SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
OnesOrZeroesF);
- SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32,
- Vector64);
+ SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64);
OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
Vector32, DAG.getIntPtrConstant(0, DL));
IntVT = MVT::i32;
}
- SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT,
- OnesOrZeroesF);
+ SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF);
SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
DAG.getConstant(1, DL, IntVT));
SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
@@ -22775,7 +22992,7 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
SDValue NewShuffle = DAG.getVectorShuffle(Shuffle->getValueType(0), DL,
Shuffle->getOperand(0), DAG.getConstant(0, DL, SrcType), Mask);
- return DAG.getNode(ISD::BITCAST, DL, N0.getValueType(), NewShuffle);
+ return DAG.getBitcast(N0.getValueType(), NewShuffle);
}
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
@@ -22916,7 +23133,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
"Unsupported VT for PSIGN");
Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
- return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+ return DAG.getBitcast(VT, Mask);
}
// PBLENDVB only available on SSE 4.1
if (!Subtarget->hasSSE41())
@@ -22924,11 +23141,11 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
- X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
- Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
- Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
+ X = DAG.getBitcast(BlendVT, X);
+ Y = DAG.getBitcast(BlendVT, Y);
+ Mask = DAG.getBitcast(BlendVT, Mask);
Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X);
- return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+ return DAG.getBitcast(VT, Mask);
}
}
@@ -23129,7 +23346,7 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
// Convert Src0 value
- SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0());
+ SDValue WideSrc0 = DAG.getBitcast(WideVecVT, Mld->getSrc0());
if (Mld->getSrc0().getOpcode() != ISD::UNDEF) {
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
@@ -23146,7 +23363,7 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
SDValue Mask = Mld->getMask();
if (Mask.getValueType() == VT) {
// Mask and original value have the same type
- NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
+ NewMask = DAG.getBitcast(WideVecVT, Mask);
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
@@ -23214,7 +23431,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue());
+ SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue());
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
@@ -23231,7 +23448,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue Mask = Mst->getMask();
if (Mask.getValueType() == VT) {
// Mask and original value have the same type
- NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
+ NewMask = DAG.getBitcast(WideVecVT, Mask);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
@@ -23323,7 +23540,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue());
+ SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue());
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
@@ -23354,7 +23571,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
StoreType, VT.getSizeInBits()/StoreType.getSizeInBits());
assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff);
+ SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff);
SmallVector<SDValue, 8> Chains;
SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, dl,
TLI.getPointerTy());
@@ -23495,7 +23712,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue ExtOp0 = OldExtract.getOperand(0);
unsigned VecSize = ExtOp0.getValueSizeInBits();
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64);
- SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtOp0);
+ SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0);
SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
BitCast, OldExtract.getOperand(1));
return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(),
@@ -24239,10 +24456,10 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
// DAG.
SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
// The AND node needs bitcasts to/from an integer vector type around it.
- SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
+ SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
N->getOperand(0)->getOperand(0), MaskConst);
- SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
+ SDValue Res = DAG.getBitcast(VT, NewAnd);
return Res;
}
@@ -24442,8 +24659,7 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
// In this case, the inner vzext is completely dead because we're going to
// only look at bits inside of the low element. Just do the outer vzext on
// a bitcast of the input to the inner.
- return DAG.getNode(X86ISD::VZEXT, DL, VT,
- DAG.getNode(ISD::BITCAST, DL, OpVT, V));
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, DAG.getBitcast(OpVT, V));
}
// Check if we can bypass extracting and re-inserting an element of an input
@@ -24465,7 +24681,7 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV,
DAG.getIntPtrConstant(0, DL));
}
- Op = DAG.getNode(ISD::BITCAST, DL, OpVT, OrigV);
+ Op = DAG.getBitcast(OpVT, OrigV);
return DAG.getNode(X86ISD::VZEXT, DL, VT, Op);
}
}
@@ -25301,6 +25517,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Res.first = DestReg;
Res.second = &X86::GR64RegClass;
}
+ } else if (VT != MVT::Other) {
+ // Type mismatch and not a clobber: Return an error;
+ Res.first = 0;
+ Res.second = nullptr;
}
} else if (Res.second == &X86::FR32RegClass ||
Res.second == &X86::FR64RegClass ||
@@ -25326,13 +25546,23 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Res.second = &X86::VR256RegClass;
else if (X86::VR512RegClass.hasType(VT))
Res.second = &X86::VR512RegClass;
+ else if (VT != MVT::Other) {
+ // Type mismatch and not a clobber: Return an error;
+ Res.first = 0;
+ Res.second = nullptr;
+ }
+ } else if (VT != MVT::Other) {
+ // Type mismatch and not a clobber: Return an error;
+ Res.first = 0;
+ Res.second = nullptr;
}
return Res;
}
int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// Scaling factors are not free at all.
// An indexed folded instruction, i.e., inst (reg1, reg2, scale),
// will take 2 allocations in the out of order engine instead of 1
@@ -25351,7 +25581,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
// E.g., on Haswell:
// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
// vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
- if (isLegalAddressingMode(AM, Ty))
+ if (isLegalAddressingMode(AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1
// as soon as we use a second register.
return AM.Scale != 0;
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index b589ca4..b5d062f 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -56,10 +56,6 @@ namespace llvm {
/// corresponds to X86::ANDNPS or X86::ANDNPD.
FANDN,
- /// Bitwise logical right shift of floating point values. This
- /// corresponds to X86::PSRLDQ.
- FSRL,
-
/// These operations represent an abstract X86 call
/// instruction, which includes a bunch of information. In particular the
/// operands of these node are:
@@ -184,6 +180,9 @@ namespace llvm {
/// Shuffle 16 8-bit values within a vector.
PSHUFB,
+ /// Compute Sum of Absolute Differences.
+ PSADBW,
+
/// Bitwise Logical AND NOT of Packed FP values.
ANDNP,
@@ -200,6 +199,7 @@ namespace llvm {
/// Combined add and sub on an FP vector.
ADDSUB,
+
// FP vector ops with rounding mode.
FADD_RND,
FSUB_RND,
@@ -207,7 +207,11 @@ namespace llvm {
FDIV_RND,
FMAX_RND,
FMIN_RND,
-
+ FSQRT_RND,
+
+ // FP vector get exponent
+ FGETEXP_RND,
+
// Integer add/sub with unsigned saturation.
ADDUS,
SUBUS,
@@ -355,6 +359,8 @@ namespace llvm {
PSHUFHW,
PSHUFLW,
SHUFP,
+ //Shuffle Packed Values at 128-bit granularity
+ SHUF128,
MOVDDUP,
MOVSHDUP,
MOVSLDUP,
@@ -374,6 +380,10 @@ namespace llvm {
VPERMIV3,
VPERMI,
VPERM2X128,
+ //Fix Up Special Packed Float32/64 values
+ VFIXUPIMM,
+ //Range Restriction Calculation For Packed Pairs of Float32/64 values
+ VRANGE,
// Broadcast scalar to vector
VBROADCAST,
// Broadcast subvector to vector
@@ -729,7 +739,8 @@ namespace llvm {
/// Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
/// Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
@@ -748,7 +759,8 @@ namespace llvm {
/// of the specified type.
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns a negative value.
- int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override;
+ int getScalingFactorCost(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
bool isVectorShiftByScalarCheap(Type *Ty) const override;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index 9d11d3c..c1d0aef 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1047,12 +1047,6 @@ multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _,
EVEX_4V;
}
}
-
-defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", X86VPermi, v8i64_info>,
- EVEX_V512, VEX_W;
-defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", X86VPermi, v8f64_info>,
- EVEX_V512, VEX_W;
-
defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>,
EVEX_V512;
defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>,
@@ -1063,37 +1057,6 @@ def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
(VPERMILPDZri VR512:$src1, imm:$imm)>;
-// -- VPERM - register form --
-multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
- PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
-
- def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
-
- def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
- EVEX_4V;
-}
-
-defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, loadv16i32, i512mem,
- v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, loadv8i64, i512mem,
- v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-let ExeDomain = SSEPackedSingle in
-defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, loadv16f32, f512mem,
- v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-let ExeDomain = SSEPackedDouble in
-defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, loadv8f64, f512mem,
- v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
// -- VPERM2I - 3 source operands form --
multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
PatFrag mem_frag, X86MemOperand x86memop,
@@ -3401,32 +3364,6 @@ defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
VR512, loadv8i64, i512mem>, EVEX_V512,
VEX_W, EVEX_CD8<64, CD8VF>;
//===----------------------------------------------------------------------===//
-// AVX-512 - PSHUFD
-//
-
-multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
- SDNode OpNode, PatFrag mem_frag,
- X86MemOperand x86memop, ValueType OpVT> {
- def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
- EVEX;
- def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
- (ins x86memop:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (OpNode (mem_frag addr:$src1),
- (i8 imm:$src2))))]>, EVEX;
-}
-
-defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, loadv16i32,
- i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-//===----------------------------------------------------------------------===//
// AVX-512 Logical Instructions
//===----------------------------------------------------------------------===//
@@ -3729,14 +3666,14 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
(ins _.RC:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
- SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V;
+ SSE_INTSHIFT_ITINS_P.rr>;
let mayLoad = 1 in
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i8 imm:$src2))),
- SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V;
+ SSE_INTSHIFT_ITINS_P.rm>;
}
multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
@@ -3746,7 +3683,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
- SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V, EVEX_B;
+ SSE_INTSHIFT_ITINS_P.rm>, EVEX_B;
}
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3836,16 +3773,16 @@ multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
}
defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>,
- avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>;
+ avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
- avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>;
+ avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V;
defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>,
- avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>;
+ avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V;
-defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>;
-defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>;
+defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>, AVX512BIi8Base, EVEX_4V;
+defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
@@ -3865,7 +3802,8 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))),
+ (_.VT (OpNode _.RC:$src1,
+ (_.VT (bitconvert (_.LdFrag addr:$src2))))),
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
EVEX_CD8<_.EltSize, CD8VF>;
}
@@ -3927,6 +3865,65 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
+//===-------------------------------------------------------------------===//
+// 1-src variable permutation VPERMW/D/Q
+//===-------------------------------------------------------------------===//
+multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+
+ let Predicates = [HasAVX512, HasVLX] in
+ defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
+ avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
+}
+
+multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
+ string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo> {
+ let Predicates = [HasAVX512] in
+ defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info512>,
+ avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info512>, EVEX_V512;
+ let Predicates = [HasAVX512, HasVLX] in
+ defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info256>,
+ avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info256>, EVEX_V256;
+}
+
+
+defm VPERM : avx512_var_shift_w<0x8D, "vpermw", X86VPermv>;
+
+defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
+ avx512vl_i32_info>;
+defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
+ avx512vl_i64_info>, VEX_W;
+defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
+ avx512vl_f32_info>;
+defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
+ avx512vl_f64_info>, VEX_W;
+
+defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
+ X86VPermi, avx512vl_i64_info>,
+ EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
+ X86VPermi, avx512vl_f64_info>,
+ EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
+//===----------------------------------------------------------------------===//
+
+defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
+ X86PShufd, avx512vl_i32_info>,
+ EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
+defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
+ X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W;
+defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
+ X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - MOVDDUP
//===----------------------------------------------------------------------===//
@@ -4869,11 +4866,6 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>;
- defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src), OpcodeStr,
- "{sae}, $src", "$src, {sae}",
- (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
-
defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(OpNode (_.FloatVT
@@ -4881,24 +4873,58 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(i32 FROUND_CURRENT))>;
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (ins _.MemOp:$src), OpcodeStr,
+ "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))),
(i32 FROUND_CURRENT))>, EVEX_B;
}
+multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ SDNode OpNode> {
+ defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr,
+ "{sae}, $src", "$src, {sae}",
+ (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
+}
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
- EVEX_CD8<32, CD8VF>;
+ avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
+ T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
- VEX_W, EVEX_CD8<32, CD8VF>;
+ avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
+ T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
}
+multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>,
+ EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>,
+ EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>,
+ EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>,
+ EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
+ }
+}
let Predicates = [HasERI], hasSideEffects = 0 in {
- defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD;
- defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD;
- defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD;
+ defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX;
+ defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX;
+ defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX;
+}
+defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
+ avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX;
+
+multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
+ SDNode OpNodeRnd, X86VectorVTInfo _>{
+ defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
+ (_.VT (OpNodeRnd _.RC:$src, (i32 imm:$rc)))>,
+ EVEX, EVEX_B, EVEX_RC;
}
multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
@@ -5007,20 +5033,22 @@ multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
}
}
-defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>;
+multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
+ SDNode OpNodeRnd> {
+ defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
+ v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
+ v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+}
+
+defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,
+ avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>;
defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
SSE_SQRTSS, SSE_SQRTSD>;
let Predicates = [HasAVX512] in {
- def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
- (VSQRTPSZr VR512:$src1)>;
- def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
- (VSQRTPDZr VR512:$src1)>;
-
def : Pat<(f32 (fsqrt FR32X:$src)),
(VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
def : Pat<(f32 (fsqrt (load addr:$src))),
@@ -5583,30 +5611,6 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1,
(loadv8i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
-multiclass avx512_valign<X86VectorVTInfo _> {
- defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
- "valign"##_.Suffix,
- "$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
- (i8 imm:$src3)))>,
- AVX512AIi8Base, EVEX_4V;
-
- // Also match valign of packed floats.
- def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
- (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
-
- let mayLoad = 1 in
- def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
- !strconcat("valign"##_.Suffix,
- "\t{$src3, $src2, $src1, $dst|"
- "$dst, $src1, $src2, $src3}"),
- []>, EVEX_4V;
-}
-defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
// Helper fragments to match sext vXi1 to vXiY.
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
@@ -5949,7 +5953,7 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
(_.LdFrag addr:$src))),
_.RC:$src0)))]>,
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
-
+
let mayLoad = 1 in
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src),
@@ -5958,7 +5962,6 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
(_.VT (bitconvert (_.LdFrag addr:$src))),
_.ImmAllZerosV)))]>,
EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>;
-
}
multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
@@ -5979,3 +5982,212 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
EVEX;
defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
EVEX, VEX_W;
+
+//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
+// op(reg_vec2,mem_vec,imm)
+// op(reg_vec2,broadcast(eltVt),imm)
+//all instruction created with FROUND_CURRENT
+multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _>{
+ defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i8 imm:$src3),
+ (i32 FROUND_CURRENT))>;
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ (i8 imm:$src3),
+ (i32 FROUND_CURRENT))>;
+ defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr##", $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+ (i8 imm:$src3),
+ (i32 FROUND_CURRENT))>, EVEX_B;
+ }
+}
+
+//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
+// op(reg_vec2,mem_vec,imm)
+// op(reg_vec2,broadcast(eltVt),imm)
+multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _>{
+ defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i8 imm:$src3))>;
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ (i8 imm:$src3))>;
+ defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr##", $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+ (i8 imm:$src3))>, EVEX_B;
+ }
+}
+
+//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
+// op(reg_vec2,mem_scalar,imm)
+//all instruction created with FROUND_CURRENT
+multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+
+ defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i8 imm:$src3),
+ (i32 FROUND_CURRENT))>;
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector
+ (_.ScalarLdFrag addr:$src2))),
+ (i8 imm:$src3),
+ (i32 FROUND_CURRENT))>;
+
+ let isAsmParserOnly = 1 in {
+ defm rmi_alt :AVX512_maskable_in_asm<opc, MRMSrcMem, _, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ []>;
+ }
+ }
+}
+
+//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
+multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, X86VectorVTInfo _>{
+ defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ OpcodeStr, "$src3,{sae}, $src2, $src1",
+ "$src1, $src2,{sae}, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i8 imm:$src3),
+ (i32 FROUND_NO_EXC))>, EVEX_B;
+}
+//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
+multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, X86VectorVTInfo _> {
+ defm NAME: avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _>;
+}
+
+multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
+ AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
+ let Predicates = [prd] in {
+ defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+ EVEX_V512;
+
+ }
+ let Predicates = [prd, HasVLX] in {
+ defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128;
+ defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256;
+ }
+}
+
+multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
+ bits<8> opc, SDNode OpNode>{
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+ }
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
+ defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
+ }
+}
+
+multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
+ X86VectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
+ let Predicates = [prd] in {
+ defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, _>,
+ avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNode, _>;
+ }
+}
+
+defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd",
+ avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VFIXUPIMMPS : avx512_common_fp_sae_packed_imm<"vfixupimmps",
+ avx512vl_f32_info, 0x54, X86VFixupimm, HasAVX512>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+
+defm VFIXUPIMMSD: avx512_common_fp_sae_scalar_imm<"vfixupimmsd", f64x_info,
+ 0x55, X86VFixupimm, HasAVX512>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info,
+ 0x55, X86VFixupimm, HasAVX512>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+
+defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
+ 0x50, X86VRange, HasDQI>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
+ 0x50, X86VRange, HasDQI>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+
+defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info,
+ 0x51, X86VRange, HasDQI>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
+ 0x51, X86VRange, HasDQI>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+
+
+multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
+ bits<8> opc, SDNode OpNode = X86Shuf128>{
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+
+ }
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
+ }
+}
+
+defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+
+multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
+ AVX512VLVectorVTInfo VTInfo_FP>{
+ defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
+ AVX512AIi8Base, EVEX_4V;
+ let isCodeGenOnly = 1 in {
+ defm NAME#_FP: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0x03, X86VAlign>,
+ AVX512AIi8Base, EVEX_4V;
+ }
+}
+
+defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
+ EVEX_CD8<32, CD8VF>;
+defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
+ EVEX_CD8<64, CD8VF>, VEX_W;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
index 331faf2..e2fa295 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
@@ -764,6 +764,14 @@ class AVX512BIi8Base : PD {
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
+class AVX512XSIi8Base : XS {
+ Domain ExeDomain = SSEPackedInt;
+ ImmType ImmT = Imm8;
+}
+class AVX512XDIi8Base : XD {
+ Domain ExeDomain = SSEPackedInt;
+ ImmType ImmT = Imm8;
+}
class AVX512PSIi8Base : PS {
Domain ExeDomain = SSEPackedSingle;
ImmType ImmT = Imm8;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 79d213c..dfe58ef 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -35,8 +35,6 @@ def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>;
// SSE specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
- SDTCisFP<0>, SDTCisInt<2> ]>;
def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisFP<1>, SDTCisVT<3, i8>,
SDTCisVec<1>]>;
@@ -65,7 +63,6 @@ def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
-def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
@@ -78,6 +75,9 @@ def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
+def X86psadbw : SDNode<"X86ISD::PSADBW",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
@@ -219,6 +219,8 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>]>;
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>;
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
@@ -229,6 +231,9 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>;
+def SDTFPUnaryOpRound : SDTypeProfile<1, 2, [ // fsqrt_round, fgetexp_round, etc.
+ SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]>;
+
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
@@ -247,7 +252,8 @@ def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
-def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>;
+def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>;
+def X86Shuf128 : SDNode<"X86ISD::SHUF128", SDTShuff3OpI>;
def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
@@ -279,6 +285,9 @@ def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
+def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>;
+def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>;
+
def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSubVecOfVec<1, 0>]>, []>;
@@ -298,6 +307,8 @@ def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
+def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
+def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 43decf7..6b7a929 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -433,6 +433,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
}
static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
+ { X86::BSF16rr, X86::BSF16rm, 0 },
+ { X86::BSF32rr, X86::BSF32rm, 0 },
+ { X86::BSF64rr, X86::BSF64rm, 0 },
+ { X86::BSR16rr, X86::BSR16rm, 0 },
+ { X86::BSR32rr, X86::BSR32rm, 0 },
+ { X86::BSR64rr, X86::BSR64rm, 0 },
{ X86::CMP16rr, X86::CMP16rm, 0 },
{ X86::CMP32rr, X86::CMP32rm, 0 },
{ X86::CMP64rr, X86::CMP64rm, 0 },
@@ -1690,8 +1696,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSUBQZrr, X86::VPSUBQZrm, 0 },
{ X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
{ X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
- { X86::VALIGNQrri, X86::VALIGNQrmi, 0 },
- { X86::VALIGNDrri, X86::VALIGNDrmi, 0 },
+ { X86::VALIGNQZrri, X86::VALIGNQZrmi, 0 },
+ { X86::VALIGNDZrri, X86::VALIGNDZrmi, 0 },
{ X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 },
{ X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
{ X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
@@ -4697,8 +4703,17 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
return false;
}
+static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs) {
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+}
+
static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
ArrayRef<MachineOperand> MOs,
+ MachineBasicBlock::iterator InsertPt,
MachineInstr *MI,
const TargetInstrInfo &TII) {
// Create the base instruction with the memory operand as the first part.
@@ -4706,11 +4721,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
MI->getDebugLoc(), true);
MachineInstrBuilder MIB(MF, NewMI);
- unsigned NumAddrOps = MOs.size();
- for (unsigned i = 0; i != NumAddrOps; ++i)
- MIB.addOperand(MOs[i]);
- if (NumAddrOps < 4) // FrameIndex only
- addOffset(MIB, 0);
+ addOperands(MIB, MOs);
// Loop over the rest of the ri operands, converting them over.
unsigned NumOps = MI->getDesc().getNumOperands()-2;
@@ -4722,11 +4733,16 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
MachineOperand &MO = MI->getOperand(i);
MIB.addOperand(MO);
}
+
+ MachineBasicBlock *MBB = InsertPt->getParent();
+ MBB->insert(InsertPt, NewMI);
+
return MIB;
}
static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
unsigned OpNo, ArrayRef<MachineOperand> MOs,
+ MachineBasicBlock::iterator InsertPt,
MachineInstr *MI, const TargetInstrInfo &TII) {
// Omit the implicit operands, something BuildMI can't do.
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
@@ -4737,38 +4753,32 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
MachineOperand &MO = MI->getOperand(i);
if (i == OpNo) {
assert(MO.isReg() && "Expected to fold into reg operand!");
- unsigned NumAddrOps = MOs.size();
- for (unsigned i = 0; i != NumAddrOps; ++i)
- MIB.addOperand(MOs[i]);
- if (NumAddrOps < 4) // FrameIndex only
- addOffset(MIB, 0);
+ addOperands(MIB, MOs);
} else {
MIB.addOperand(MO);
}
}
+
+ MachineBasicBlock *MBB = InsertPt->getParent();
+ MBB->insert(InsertPt, NewMI);
+
return MIB;
}
static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
ArrayRef<MachineOperand> MOs,
+ MachineBasicBlock::iterator InsertPt,
MachineInstr *MI) {
- MachineFunction &MF = *MI->getParent()->getParent();
- MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
-
- unsigned NumAddrOps = MOs.size();
- for (unsigned i = 0; i != NumAddrOps; ++i)
- MIB.addOperand(MOs[i]);
- if (NumAddrOps < 4) // FrameIndex only
- addOffset(MIB, 0);
+ MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
+ MI->getDebugLoc(), TII.get(Opcode));
+ addOperands(MIB, MOs);
return MIB.addImm(0);
}
-MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- unsigned OpNum,
- ArrayRef<MachineOperand> MOs,
- unsigned Size, unsigned Align,
- bool AllowCommute) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, unsigned OpNum,
+ ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
+ unsigned Size, unsigned Align, bool AllowCommute) const {
const DenseMap<unsigned,
std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
bool isCallRegIndirect = Subtarget.callRegIndirect();
@@ -4802,7 +4812,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
isTwoAddrFold = true;
} else if (OpNum == 0) {
if (MI->getOpcode() == X86::MOV32r0) {
- NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+ NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI);
if (NewMI)
return NewMI;
}
@@ -4847,9 +4857,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
}
if (isTwoAddrFold)
- NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
+ NewMI = FuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this);
else
- NewMI = FuseInst(MF, Opcode, OpNum, MOs, MI, *this);
+ NewMI = FuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this);
if (NarrowToMOV32rm) {
// If this is the special case where we use a MOV32rm to load a 32-bit
@@ -4901,8 +4911,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Attempt to fold with the commuted version of the instruction.
unsigned CommuteOp =
(CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
- NewMI = foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, Size, Align,
- /*AllowCommute=*/false);
+ NewMI =
+ foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, InsertPt, Size, Align,
+ /*AllowCommute=*/false);
if (NewMI)
return NewMI;
@@ -5131,10 +5142,9 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
MI->addRegisterKilled(Reg, TRI, true);
}
-MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- int FrameIndex) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
// Check switch flag
if (NoFusing) return nullptr;
@@ -5173,8 +5183,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return nullptr;
return foldMemoryOperandImpl(MF, MI, Ops[0],
- MachineOperand::CreateFI(FrameIndex), Size,
- Alignment, /*AllowCommute=*/true);
+ MachineOperand::CreateFI(FrameIndex), InsertPt,
+ Size, Alignment, /*AllowCommute=*/true);
}
static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
@@ -5196,17 +5206,16 @@ static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
return false;
}
-MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineInstr *LoadMI) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const {
// If loading from a FrameIndex, fold directly from the FrameIndex.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
int FrameIndex;
if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
if (isPartialRegisterLoad(*LoadMI, MF))
return nullptr;
- return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+ return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex);
}
// Check switch flag
@@ -5326,7 +5335,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
break;
}
}
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt,
/*Size=*/0, Alignment, /*AllowCommute=*/true);
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index 0dd8101..ac1b2d4 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -307,6 +307,7 @@ public:
/// references has been changed.
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
int FrameIndex) const override;
/// foldMemoryOperand - Same as the previous version except it allows folding
@@ -314,6 +315,7 @@ public:
/// stack slot.
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
MachineInstr *LoadMI) const override;
/// canFoldMemoryOperand - Returns true if the specified load / store is
@@ -407,6 +409,7 @@ public:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned OpNum,
ArrayRef<MachineOperand> MOs,
+ MachineBasicBlock::iterator InsertPt,
unsigned Size, unsigned Alignment,
bool AllowCommute) const;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index 70c2027520..e936b4b 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -788,6 +788,7 @@ def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
+def HasMPX : Predicate<"Subtarget->hasMPX()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
@@ -2456,6 +2457,9 @@ include "X86InstrAVX512.td"
include "X86InstrMMX.td"
include "X86Instr3DNow.td"
+// MPX instructions
+include "X86InstrMPX.td"
+
include "X86InstrVMX.td"
include "X86InstrSVM.td"
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMPX.td b/contrib/llvm/lib/Target/X86/X86InstrMPX.td
new file mode 100644
index 0000000..cf5e2e3
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrMPX.td
@@ -0,0 +1,70 @@
+//===-- X86InstrMPX.td - MPX Instruction Set ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MPX instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
+ def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins i32mem:$src),
+ OpcodeStr#" \t{$src, $dst|$dst, $src}", []>,
+ Requires<[HasMPX, Not64BitMode]>;
+ def 64rm: RI<opc, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
+ OpcodeStr#" \t{$src, $dst|$dst, $src}", []>,
+ Requires<[HasMPX, In64BitMode]>;
+}
+
+defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
+
+multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
+ def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i32mem:$src2),
+ OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ Requires<[HasMPX, Not64BitMode]>;
+ def 64rm: RI<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i64mem:$src2),
+ OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ Requires<[HasMPX, In64BitMode]>;
+ def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2),
+ OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ Requires<[HasMPX, Not64BitMode]>;
+ def 64rr: RI<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2),
+ OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ Requires<[HasMPX, In64BitMode]>;
+}
+defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS;
+defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD;
+defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD;
+
+def BNDMOVRMrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
+ "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ Requires<[HasMPX]>;
+def BNDMOVRM32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
+ "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ Requires<[HasMPX, Not64BitMode]>;
+def BNDMOVRM64rm : RI<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
+ "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ Requires<[HasMPX, In64BitMode]>;
+
+def BNDMOVMRrr : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
+ "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ Requires<[HasMPX]>;
+def BNDMOVMR32mr : I<0x1B, MRMDestMem, (outs i64mem:$dst), (ins BNDR:$src),
+ "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ Requires<[HasMPX, Not64BitMode]>;
+def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs i128mem:$dst), (ins BNDR:$src),
+ "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ Requires<[HasMPX, In64BitMode]>;
+
+def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
+ "bndstx \t{$src, $dst|$dst, $src}", []>, TB,
+ Requires<[HasMPX]>;
+def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
+ "bndldx \t{$src, $dst|$dst, $src}", []>, TB,
+ Requires<[HasMPX]>; \ No newline at end of file
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index d3b401e..8294e38 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3560,7 +3560,7 @@ multiclass scalar_unary_math_patterns<Intrinsic Intr, string OpcPrefix,
let Predicates = [HasAVX] in {
def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
(!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
-
+
def : Pat<(VT (X86Blendi VT:$dst, (Intr VT:$src), (i8 1))),
(!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
}
@@ -4053,6 +4053,20 @@ defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
int_x86_avx2_psad_bw, SSE_PMADD, 1>;
+let Predicates = [HasAVX2] in
+ def : Pat<(v32i8 (X86psadbw (v32i8 VR256:$src1),
+ (v32i8 VR256:$src2))),
+ (VPSADBWYrr VR256:$src2, VR256:$src1)>;
+
+let Predicates = [HasAVX] in
+ def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (VPSADBWrr VR128:$src2, VR128:$src1)>;
+
+def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (PSADBWrr VR128:$src2, VR128:$src1)>;
+
let Predicates = [HasAVX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
@@ -4207,16 +4221,6 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
}
} // Constraints = "$src1 = $dst"
-let Predicates = [HasAVX] in {
- def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
- (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
-}
-
-let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
- (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
-}
-
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Comparison Instructions
//===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 4af514a..0268066 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -21,7 +21,8 @@ enum IntrinsicType {
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
- INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK,
+ INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK,
+ INTR_TYPE_3OP_MASK, FMA_OP_MASK,
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
EXPAND_FROM_MEM, BLEND
};
@@ -339,9 +340,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV,
- X86ISD::FDIV_RND),
+ X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV,
- X86ISD::FDIV_RND),
+ X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG,
@@ -366,6 +367,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_pd_512, INTR_TYPE_1OP_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
@@ -559,6 +572,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::RNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::RNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
+ X86ISD::FSQRT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
+ X86ISD::FSQRT_RND),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
@@ -583,6 +604,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index 556b518..ff1436a 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -128,6 +128,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
const DataLayout *DL = TM.getDataLayout();
assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
+ MCSymbol *Sym = nullptr;
SmallString<128> Name;
StringRef Suffix;
@@ -160,12 +161,14 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
else
getMang()->getNameWithPrefix(Name, MO.getSymbolName());
} else if (MO.isMBB()) {
- Name += MO.getMBB()->getSymbol()->getName();
+ assert(Suffix.empty());
+ Sym = MO.getMBB()->getSymbol();
}
unsigned OrigLen = Name.size() - PrefixLen;
Name += Suffix;
- MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
+ if (!Sym)
+ Sym = Ctx.getOrCreateSymbol(Name);
StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
@@ -240,10 +243,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
case X86II::MO_TLVP_PIC_BASE:
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
// Subtract the pic base.
- Expr = MCBinaryExpr::CreateSub(Expr,
- MCSymbolRefExpr::Create(MF.getPICBaseSymbol(),
+ Expr = MCBinaryExpr::createSub(Expr,
+ MCSymbolRefExpr::create(MF.getPICBaseSymbol(),
Ctx),
Ctx);
break;
@@ -264,10 +267,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_PIC_BASE_OFFSET:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
- Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, Ctx);
// Subtract the pic base.
- Expr = MCBinaryExpr::CreateSub(Expr,
- MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
+ Expr = MCBinaryExpr::createSub(Expr,
+ MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx),
Ctx);
if (MO.isJTI()) {
assert(MAI.doesSetDirectiveSuppressesReloc());
@@ -277,17 +280,17 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
// section so we are restricting it to jumptable references.
MCSymbol *Label = Ctx.createTempSymbol();
AsmPrinter.OutStreamer->EmitAssignment(Label, Expr);
- Expr = MCSymbolRefExpr::Create(Label, Ctx);
+ Expr = MCSymbolRefExpr::create(Label, Ctx);
}
break;
}
if (!Expr)
- Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
- Expr = MCBinaryExpr::CreateAdd(Expr,
- MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Expr = MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(MO.getOffset(), Ctx),
Ctx);
return MCOperand::createExpr(Expr);
}
@@ -710,7 +713,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
}
MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
- const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context);
+ const MCSymbolRefExpr *symRef = MCSymbolRefExpr::create(sym, SRVK, context);
MCInst LEA;
if (is64Bits) {
@@ -749,7 +752,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name);
const MCSymbolRefExpr *tlsRef =
- MCSymbolRefExpr::Create(tlsGetAddr,
+ MCSymbolRefExpr::create(tlsGetAddr,
MCSymbolRefExpr::VK_PLT,
context);
@@ -1071,7 +1074,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32)
- .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
+ .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
// Emit the label.
OutStreamer->EmitLabel(PICBase);
@@ -1100,12 +1103,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Now that we have emitted the label, lower the complex operand expression.
MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
- const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+ const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
const MCExpr *PICBase =
- MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
- DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
+ MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
+ DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
- DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
+ DotExpr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(OpSym,OutContext),
DotExpr, OutContext);
EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 1f36163..e9b6bfc 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -175,12 +175,12 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
case 2: // Available for tailcall (not callee-saved GPRs).
- if (IsWin64)
+ const Function *F = MF.getFunction();
+ if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64))
return &X86::GR64_TCW64RegClass;
else if (Is64Bit)
return &X86::GR64_TCRegClass;
- const Function *F = MF.getFunction();
bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
if (hasHipeCC)
return &X86::GR32RegClass;
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
index 2e735fa..cdb151c 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -302,6 +302,11 @@ def CR15 : X86Reg<"cr15", 15>;
def EIZ : X86Reg<"eiz", 4>;
def RIZ : X86Reg<"riz", 4>;
+// Bound registers, used in MPX instructions
+def BND0 : X86Reg<"bnd0", 0>;
+def BND1 : X86Reg<"bnd1", 1>;
+def BND2 : X86Reg<"bnd2", 2>;
+def BND3 : X86Reg<"bnd3", 3>;
//===----------------------------------------------------------------------===//
// Register Class Definitions... now that we have all of the pieces, define the
@@ -484,3 +489,6 @@ def VK8WM : RegisterClass<"X86", [v8i1], 8, (sub VK8, K0)> {let Size = 8;}
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
+
+// Bound registers
+def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>; \ No newline at end of file
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index 1cdab14..74af29f 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -259,6 +259,7 @@ void X86Subtarget::initializeEnvironment() {
HasSHA = false;
HasPRFCHW = false;
HasRDSEED = false;
+ HasMPX = false;
IsBTMemSlow = false;
IsSHLDSlow = false;
IsUAMemFast = false;
@@ -273,8 +274,6 @@ void X86Subtarget::initializeEnvironment() {
LEAUsesAG = false;
SlowLEA = false;
SlowIncDec = false;
- UseSqrtEst = false;
- UseReciprocalEst = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index 455dd77..a476f7a 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -190,16 +190,6 @@ protected:
/// True if INC and DEC instructions are slow when writing to flags
bool SlowIncDec;
- /// Use the RSQRT* instructions to optimize square root calculations.
- /// For this to be profitable, the cost of FSQRT and FDIV must be
- /// substantially higher than normal FP ops like FADD and FMUL.
- bool UseSqrtEst;
-
- /// Use the RCP* instructions to optimize FP division calculations.
- /// For this to be profitable, the cost of FDIV must be
- /// substantially higher than normal FP ops like FADD and FMUL.
- bool UseReciprocalEst;
-
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI;
@@ -218,6 +208,9 @@ protected:
/// Processor has AVX-512 Vector Length eXtenstions
bool HasVLX;
+ /// Processot supports MPX - Memory Protection Extensions
+ bool HasMPX;
+
/// Use software floating point for code generation.
bool UseSoftFloat;
@@ -377,14 +370,13 @@ public:
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
bool slowIncDec() const { return SlowIncDec; }
- bool useSqrtEst() const { return UseSqrtEst; }
- bool useReciprocalEst() const { return UseReciprocalEst; }
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
bool hasDQI() const { return HasDQI; }
bool hasBWI() const { return HasBWI; }
bool hasVLX() const { return HasVLX; }
+ bool hasMPX() const { return HasMPX; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index 3e5f1d8..646cff7 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -105,6 +105,13 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
if (Subtarget.isTargetWin64())
this->Options.TrapUnreachable = true;
+ // TODO: By default, all reciprocal estimate operations are off because
+ // that matches the behavior before TargetRecip was added (except for btver2
+ // which used subtarget features to enable this type of codegen).
+ // We should change this to match GCC behavior where everything but
+ // scalar division estimates are turned on by default with -ffast-math.
+ this->Options.Reciprocals.setDefaults("all", false, 1);
+
initAsmInfo();
}
@@ -221,9 +228,9 @@ bool X86PassConfig::addILPOpts() {
}
bool X86PassConfig::addPreISel() {
- // Only add this pass for 32-bit x86.
+ // Only add this pass for 32-bit x86 Windows.
Triple TT(TM->getTargetTriple());
- if (TT.getArch() == Triple::x86)
+ if (TT.isOSWindows() && TT.getArch() == Triple::x86)
addPass(createX86WinEHStatePass());
return true;
}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
index 6bf45c3..f9f6290 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -32,9 +32,9 @@ const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference(
if ((Encoding & DW_EH_PE_indirect) && (Encoding & DW_EH_PE_pcrel)) {
const MCSymbol *Sym = TM.getSymbol(GV, Mang);
const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
- const MCExpr *Four = MCConstantExpr::Create(4, getContext());
- return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
+ const MCExpr *Four = MCConstantExpr::create(4, getContext());
+ return MCBinaryExpr::createAdd(Res, Four, getContext());
}
return TargetLoweringObjectFileMachO::getTTypeGlobalReference(
@@ -55,14 +55,14 @@ const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
// foo@GOTPCREL+4+<offset>.
unsigned FinalOff = Offset+MV.getConstant()+4;
const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
- const MCExpr *Off = MCConstantExpr::Create(FinalOff, getContext());
- return MCBinaryExpr::CreateAdd(Res, Off, getContext());
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
+ const MCExpr *Off = MCConstantExpr::create(FinalOff, getContext());
+ return MCBinaryExpr::createAdd(Res, Off, getContext());
}
const MCExpr *X86ELFTargetObjectFile::getDebugThreadLocalSymbol(
const MCSymbol *Sym) const {
- return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext());
+ return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext());
}
void
@@ -116,7 +116,7 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
if (GOLHS->isThreadLocal())
return nullptr;
- return MCSymbolRefExpr::Create(TM.getSymbol(GOLHS, Mang),
+ return MCSymbolRefExpr::create(TM.getSymbol(GOLHS, Mang),
MCSymbolRefExpr::VK_COFF_IMGREL32,
getContext());
}
diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
index 4efaada..ce69ea7 100644
--- a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -16,6 +16,7 @@
#include "X86.h"
#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Dominators.h"
@@ -59,30 +60,49 @@ public:
private:
void emitExceptionRegistrationRecord(Function *F);
- void linkExceptionRegistration(IRBuilder<> &Builder, Value *RegNode,
- Value *Handler);
- void unlinkExceptionRegistration(IRBuilder<> &Builder, Value *RegNode);
+ void linkExceptionRegistration(IRBuilder<> &Builder, Value *Handler);
+ void unlinkExceptionRegistration(IRBuilder<> &Builder);
+ void addCXXStateStores(Function &F, MachineModuleInfo &MMI);
+ void addCXXStateStoresToFunclet(Value *ParentRegNode, WinEHFuncInfo &FuncInfo,
+ Function &F, int BaseState);
+ void insertStateNumberStore(Value *ParentRegNode, Instruction *IP, int State);
Value *emitEHLSDA(IRBuilder<> &Builder, Function *F);
Function *generateLSDAInEAXThunk(Function *ParentFunc);
+ int escapeRegNode(Function &F);
+
// Module-level type getters.
- Type *getEHRegistrationType();
- Type *getSEH3RegistrationType();
- Type *getSEH4RegistrationType();
- Type *getCXXEH3RegistrationType();
+ Type *getEHLinkRegistrationType();
+ Type *getSEHRegistrationType();
+ Type *getCXXEHRegistrationType();
// Per-module data.
Module *TheModule = nullptr;
- StructType *EHRegistrationTy = nullptr;
- StructType *CXXEH3RegistrationTy = nullptr;
- StructType *SEH3RegistrationTy = nullptr;
- StructType *SEH4RegistrationTy = nullptr;
+ StructType *EHLinkRegistrationTy = nullptr;
+ StructType *CXXEHRegistrationTy = nullptr;
+ StructType *SEHRegistrationTy = nullptr;
+ Function *FrameRecover = nullptr;
+ Function *FrameAddress = nullptr;
+ Function *FrameEscape = nullptr;
// Per-function state
EHPersonality Personality = EHPersonality::Unknown;
Function *PersonalityFn = nullptr;
+
+ /// The stack allocation containing all EH data, including the link in the
+ /// fs:00 chain and the current state.
+ AllocaInst *RegNode = nullptr;
+
+ /// Struct type of RegNode. Used for GEPing.
+ Type *RegNodeTy = nullptr;
+
+ /// The index of the state field of RegNode.
+ int StateFieldIndex = ~0U;
+
+ /// The linked list node subobject inside of RegNode.
+ Value *Link = nullptr;
};
}
@@ -92,16 +112,21 @@ char WinEHStatePass::ID = 0;
bool WinEHStatePass::doInitialization(Module &M) {
TheModule = &M;
+ FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::frameescape);
+ FrameRecover = Intrinsic::getDeclaration(TheModule, Intrinsic::framerecover);
+ FrameAddress = Intrinsic::getDeclaration(TheModule, Intrinsic::frameaddress);
return false;
}
bool WinEHStatePass::doFinalization(Module &M) {
assert(TheModule == &M);
TheModule = nullptr;
- EHRegistrationTy = nullptr;
- CXXEH3RegistrationTy = nullptr;
- SEH3RegistrationTy = nullptr;
- SEH4RegistrationTy = nullptr;
+ EHLinkRegistrationTy = nullptr;
+ CXXEHRegistrationTy = nullptr;
+ SEHRegistrationTy = nullptr;
+ FrameEscape = nullptr;
+ FrameRecover = nullptr;
+ FrameAddress = nullptr;
return false;
}
@@ -136,8 +161,19 @@ bool WinEHStatePass::runOnFunction(Function &F) {
if (!isMSVCEHPersonality(Personality))
return false;
+ // Disable frame pointer elimination in this function.
+ // FIXME: Do the nested handlers need to keep the parent ebp in ebp, or can we
+ // use an arbitrary register?
+ F.addFnAttr("no-frame-pointer-elim", "true");
+
emitExceptionRegistrationRecord(&F);
- // FIXME: State insertion.
+
+ auto *MMIPtr = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMIPtr && "MachineModuleInfo should always be available");
+ MachineModuleInfo &MMI = *MMIPtr;
+ if (Personality == EHPersonality::MSVC_CXX) {
+ addCXXStateStores(F, MMI);
+ }
// Reset per-function state.
PersonalityFn = nullptr;
@@ -152,17 +188,17 @@ bool WinEHStatePass::runOnFunction(Function &F) {
/// EHRegistrationNode *Next;
/// PEXCEPTION_ROUTINE Handler;
/// };
-Type *WinEHStatePass::getEHRegistrationType() {
- if (EHRegistrationTy)
- return EHRegistrationTy;
+Type *WinEHStatePass::getEHLinkRegistrationType() {
+ if (EHLinkRegistrationTy)
+ return EHLinkRegistrationTy;
LLVMContext &Context = TheModule->getContext();
- EHRegistrationTy = StructType::create(Context, "EHRegistrationNode");
+ EHLinkRegistrationTy = StructType::create(Context, "EHRegistrationNode");
Type *FieldTys[] = {
- EHRegistrationTy->getPointerTo(0), // EHRegistrationNode *Next
+ EHLinkRegistrationTy->getPointerTo(0), // EHRegistrationNode *Next
Type::getInt8PtrTy(Context) // EXCEPTION_DISPOSITION (*Handler)(...)
};
- EHRegistrationTy->setBody(FieldTys, false);
- return EHRegistrationTy;
+ EHLinkRegistrationTy->setBody(FieldTys, false);
+ return EHLinkRegistrationTy;
}
/// The __CxxFrameHandler3 registration node:
@@ -171,40 +207,21 @@ Type *WinEHStatePass::getEHRegistrationType() {
/// EHRegistrationNode SubRecord;
/// int32_t TryLevel;
/// };
-Type *WinEHStatePass::getCXXEH3RegistrationType() {
- if (CXXEH3RegistrationTy)
- return CXXEH3RegistrationTy;
+Type *WinEHStatePass::getCXXEHRegistrationType() {
+ if (CXXEHRegistrationTy)
+ return CXXEHRegistrationTy;
LLVMContext &Context = TheModule->getContext();
Type *FieldTys[] = {
Type::getInt8PtrTy(Context), // void *SavedESP
- getEHRegistrationType(), // EHRegistrationNode SubRecord
+ getEHLinkRegistrationType(), // EHRegistrationNode SubRecord
Type::getInt32Ty(Context) // int32_t TryLevel
};
- CXXEH3RegistrationTy =
+ CXXEHRegistrationTy =
StructType::create(FieldTys, "CXXExceptionRegistration");
- return CXXEH3RegistrationTy;
-}
-
-/// The _except_handler3 registration node:
-/// struct EH3ExceptionRegistration {
-/// EHRegistrationNode SubRecord;
-/// void *ScopeTable;
-/// int32_t TryLevel;
-/// };
-Type *WinEHStatePass::getSEH3RegistrationType() {
- if (SEH3RegistrationTy)
- return SEH3RegistrationTy;
- LLVMContext &Context = TheModule->getContext();
- Type *FieldTys[] = {
- getEHRegistrationType(), // EHRegistrationNode SubRecord
- Type::getInt8PtrTy(Context), // void *ScopeTable
- Type::getInt32Ty(Context) // int32_t TryLevel
- };
- SEH3RegistrationTy = StructType::create(FieldTys, "EH3ExceptionRegistration");
- return SEH3RegistrationTy;
+ return CXXEHRegistrationTy;
}
-/// The _except_handler4 registration node:
+/// The _except_handler3/4 registration node:
/// struct EH4ExceptionRegistration {
/// void *SavedESP;
/// _EXCEPTION_POINTERS *ExceptionPointers;
@@ -212,19 +229,19 @@ Type *WinEHStatePass::getSEH3RegistrationType() {
/// int32_t EncodedScopeTable;
/// int32_t TryLevel;
/// };
-Type *WinEHStatePass::getSEH4RegistrationType() {
- if (SEH4RegistrationTy)
- return SEH4RegistrationTy;
+Type *WinEHStatePass::getSEHRegistrationType() {
+ if (SEHRegistrationTy)
+ return SEHRegistrationTy;
LLVMContext &Context = TheModule->getContext();
Type *FieldTys[] = {
Type::getInt8PtrTy(Context), // void *SavedESP
Type::getInt8PtrTy(Context), // void *ExceptionPointers
- getEHRegistrationType(), // EHRegistrationNode SubRecord
+ getEHLinkRegistrationType(), // EHRegistrationNode SubRecord
Type::getInt32Ty(Context), // int32_t EncodedScopeTable
Type::getInt32Ty(Context) // int32_t TryLevel
};
- SEH4RegistrationTy = StructType::create(FieldTys, "EH4ExceptionRegistration");
- return SEH4RegistrationTy;
+ SEHRegistrationTy = StructType::create(FieldTys, "SEHExceptionRegistration");
+ return SEHRegistrationTy;
}
// Emit an exception registration record. These are stack allocations with the
@@ -238,62 +255,63 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
StringRef PersonalityName = PersonalityFn->getName();
IRBuilder<> Builder(&F->getEntryBlock(), F->getEntryBlock().begin());
Type *Int8PtrType = Builder.getInt8PtrTy();
- Value *SubRecord = nullptr;
- if (PersonalityName == "__CxxFrameHandler3") {
- Type *RegNodeTy = getCXXEH3RegistrationType();
- Value *RegNode = Builder.CreateAlloca(RegNodeTy);
+ if (Personality == EHPersonality::MSVC_CXX) {
+ RegNodeTy = getCXXEHRegistrationType();
+ RegNode = Builder.CreateAlloca(RegNodeTy);
// FIXME: We can skip this in -GS- mode, when we figure that out.
// SavedESP = llvm.stacksave()
Value *SP = Builder.CreateCall(
Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {});
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
// TryLevel = -1
- Builder.CreateStore(Builder.getInt32(-1),
- Builder.CreateStructGEP(RegNodeTy, RegNode, 2));
+ StateFieldIndex = 2;
+ insertStateNumberStore(RegNode, Builder.GetInsertPoint(), -1);
// Handler = __ehhandler$F
Function *Trampoline = generateLSDAInEAXThunk(F);
- SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 1);
- linkExceptionRegistration(Builder, SubRecord, Trampoline);
- } else if (PersonalityName == "_except_handler3") {
- Type *RegNodeTy = getSEH3RegistrationType();
- Value *RegNode = Builder.CreateAlloca(RegNodeTy);
- // TryLevel = -1
- Builder.CreateStore(Builder.getInt32(-1),
- Builder.CreateStructGEP(RegNodeTy, RegNode, 2));
- // ScopeTable = llvm.x86.seh.lsda(F)
- Value *LSDA = emitEHLSDA(Builder, F);
- Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 1));
- SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 0);
- linkExceptionRegistration(Builder, SubRecord, PersonalityFn);
- } else if (PersonalityName == "_except_handler4") {
- Type *RegNodeTy = getSEH4RegistrationType();
- Value *RegNode = Builder.CreateAlloca(RegNodeTy);
+ Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 1);
+ linkExceptionRegistration(Builder, Trampoline);
+ } else if (Personality == EHPersonality::MSVC_X86SEH) {
+ // If _except_handler4 is in use, some additional guard checks and prologue
+ // stuff is required.
+ bool UseStackGuard = (PersonalityName == "_except_handler4");
+ RegNodeTy = getSEHRegistrationType();
+ RegNode = Builder.CreateAlloca(RegNodeTy);
// SavedESP = llvm.stacksave()
Value *SP = Builder.CreateCall(
Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {});
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
- // TryLevel = -2
- Builder.CreateStore(Builder.getInt32(-2),
- Builder.CreateStructGEP(RegNodeTy, RegNode, 4));
- // FIXME: XOR the LSDA with __security_cookie.
+ // TryLevel = -2 / -1
+ StateFieldIndex = 4;
+ insertStateNumberStore(RegNode, Builder.GetInsertPoint(),
+ UseStackGuard ? -2 : -1);
// ScopeTable = llvm.x86.seh.lsda(F)
Value *FI8 = Builder.CreateBitCast(F, Int8PtrType);
Value *LSDA = Builder.CreateCall(
Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_lsda), FI8);
- Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 1));
- SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 2);
- linkExceptionRegistration(Builder, SubRecord, PersonalityFn);
+ Type *Int32Ty = Type::getInt32Ty(TheModule->getContext());
+ LSDA = Builder.CreatePtrToInt(LSDA, Int32Ty);
+ // If using _except_handler4, xor the address of the table with
+ // __security_cookie.
+ if (UseStackGuard) {
+ Value *Cookie =
+ TheModule->getOrInsertGlobal("__security_cookie", Int32Ty);
+ Value *Val = Builder.CreateLoad(Int32Ty, Cookie);
+ LSDA = Builder.CreateXor(LSDA, Val);
+ }
+ Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 3));
+ Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 2);
+ linkExceptionRegistration(Builder, PersonalityFn);
} else {
llvm_unreachable("unexpected personality function");
}
- // FIXME: Insert an unlink before all returns.
+ // Insert an unlink before all returns.
for (BasicBlock &BB : *F) {
TerminatorInst *T = BB.getTerminator();
if (!isa<ReturnInst>(T))
continue;
Builder.SetInsertPoint(T);
- unlinkExceptionRegistration(Builder, SubRecord);
+ unlinkExceptionRegistration(Builder);
}
}
@@ -342,33 +360,122 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
}
void WinEHStatePass::linkExceptionRegistration(IRBuilder<> &Builder,
- Value *RegNode, Value *Handler) {
- Type *RegNodeTy = getEHRegistrationType();
+ Value *Handler) {
+ Type *LinkTy = getEHLinkRegistrationType();
// Handler = Handler
Handler = Builder.CreateBitCast(Handler, Builder.getInt8PtrTy());
- Builder.CreateStore(Handler, Builder.CreateStructGEP(RegNodeTy, RegNode, 1));
+ Builder.CreateStore(Handler, Builder.CreateStructGEP(LinkTy, Link, 1));
// Next = [fs:00]
Constant *FSZero =
- Constant::getNullValue(RegNodeTy->getPointerTo()->getPointerTo(257));
+ Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257));
Value *Next = Builder.CreateLoad(FSZero);
- Builder.CreateStore(Next, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
- // [fs:00] = RegNode
- Builder.CreateStore(RegNode, FSZero);
+ Builder.CreateStore(Next, Builder.CreateStructGEP(LinkTy, Link, 0));
+ // [fs:00] = Link
+ Builder.CreateStore(Link, FSZero);
}
-void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder,
- Value *RegNode) {
- // Clone RegNode into the current BB for better address mode folding.
- if (auto *GEP = dyn_cast<GetElementPtrInst>(RegNode)) {
+void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
+ // Clone Link into the current BB for better address mode folding.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Link)) {
GEP = cast<GetElementPtrInst>(GEP->clone());
Builder.Insert(GEP);
- RegNode = GEP;
+ Link = GEP;
}
- Type *RegNodeTy = getEHRegistrationType();
- // [fs:00] = RegNode->Next
+ Type *LinkTy = getEHLinkRegistrationType();
+ // [fs:00] = Link->Next
Value *Next =
- Builder.CreateLoad(Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
+ Builder.CreateLoad(Builder.CreateStructGEP(LinkTy, Link, 0));
Constant *FSZero =
- Constant::getNullValue(RegNodeTy->getPointerTo()->getPointerTo(257));
+ Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257));
Builder.CreateStore(Next, FSZero);
}
+
+void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) {
+ WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
+ calculateWinCXXEHStateNumbers(&F, FuncInfo);
+
+ // The base state for the parent is -1.
+ addCXXStateStoresToFunclet(RegNode, FuncInfo, F, -1);
+
+ // Set up RegNodeEscapeIndex
+ int RegNodeEscapeIndex = escapeRegNode(F);
+
+ // Only insert stores in catch handlers.
+ Constant *FI8 =
+ ConstantExpr::getBitCast(&F, Type::getInt8PtrTy(TheModule->getContext()));
+ for (auto P : FuncInfo.HandlerBaseState) {
+ Function *Handler = const_cast<Function *>(P.first);
+ int BaseState = P.second;
+ IRBuilder<> Builder(&Handler->getEntryBlock(),
+ Handler->getEntryBlock().begin());
+ // FIXME: Find and reuse such a call if present.
+ Value *ParentFP = Builder.CreateCall(FrameAddress, {Builder.getInt32(1)});
+ Value *RecoveredRegNode = Builder.CreateCall(
+ FrameRecover, {FI8, ParentFP, Builder.getInt32(RegNodeEscapeIndex)});
+ RecoveredRegNode =
+ Builder.CreateBitCast(RecoveredRegNode, RegNodeTy->getPointerTo(0));
+ addCXXStateStoresToFunclet(RecoveredRegNode, FuncInfo, *Handler, BaseState);
+ }
+}
+
+/// Escape RegNode so that we can access it from child handlers. Find the call
+/// to frameescape, if any, in the entry block and append RegNode to the list
+/// of arguments.
+int WinEHStatePass::escapeRegNode(Function &F) {
+ // Find the call to frameescape and extract its arguments.
+ IntrinsicInst *EscapeCall = nullptr;
+ for (Instruction &I : F.getEntryBlock()) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (II && II->getIntrinsicID() == Intrinsic::frameescape) {
+ EscapeCall = II;
+ break;
+ }
+ }
+ SmallVector<Value *, 8> Args;
+ if (EscapeCall) {
+ auto Ops = EscapeCall->arg_operands();
+ Args.append(Ops.begin(), Ops.end());
+ }
+ Args.push_back(RegNode);
+
+ // Replace the call (if it exists) with new one. Otherwise, insert at the end
+ // of the entry block.
+ IRBuilder<> Builder(&F.getEntryBlock(),
+ EscapeCall ? EscapeCall : F.getEntryBlock().end());
+ Builder.CreateCall(FrameEscape, Args);
+ if (EscapeCall)
+ EscapeCall->eraseFromParent();
+ return Args.size() - 1;
+}
+
+void WinEHStatePass::addCXXStateStoresToFunclet(Value *ParentRegNode,
+ WinEHFuncInfo &FuncInfo,
+ Function &F, int BaseState) {
+ // Iterate all the instructions and emit state number stores.
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ // Possibly throwing call instructions have no actions to take after
+ // an unwind. Ensure they are in the -1 state.
+ if (CI->doesNotThrow())
+ continue;
+ insertStateNumberStore(ParentRegNode, CI, BaseState);
+ } else if (auto *II = dyn_cast<InvokeInst>(&I)) {
+ // Look up the state number of the landingpad this unwinds to.
+ LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst();
+ // FIXME: Why does this assertion fail?
+ //assert(FuncInfo.LandingPadStateMap.count(LPI) && "LP has no state!");
+ int State = FuncInfo.LandingPadStateMap[LPI];
+ insertStateNumberStore(ParentRegNode, II, State);
+ }
+ }
+ }
+}
+
+void WinEHStatePass::insertStateNumberStore(Value *ParentRegNode,
+ Instruction *IP, int State) {
+ IRBuilder<> Builder(IP);
+ Value *StateField =
+ Builder.CreateStructGEP(RegNodeTy, ParentRegNode, StateFieldIndex);
+ Builder.CreateStore(Builder.getInt32(State), StateField);
+}
OpenPOWER on IntegriCloud