summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2011-02-20 12:57:14 +0000
committerdim <dim@FreeBSD.org>2011-02-20 12:57:14 +0000
commitcbb70ce070d220642b038ea101d9c0f9fbf860d6 (patch)
treed2b61ce94e654cb01a254d2195259db5f9cc3f3c /lib/Target/X86
parent4ace901e87dac5bbbac78ed325e75462e48e386e (diff)
downloadFreeBSD-src-cbb70ce070d220642b038ea101d9c0f9fbf860d6.zip
FreeBSD-src-cbb70ce070d220642b038ea101d9c0f9fbf860d6.tar.gz
Vendor import of llvm trunk r126079:
http://llvm.org/svn/llvm-project/llvm/trunk@126079
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmLexer.cpp9
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp437
-rw-r--r--lib/Target/X86/CMakeLists.txt14
-rw-r--r--lib/Target/X86/Disassembler/CMakeLists.txt2
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp15
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.h2
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c31
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h4
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h3
-rw-r--r--lib/Target/X86/InstPrinter/CMakeLists.txt (renamed from lib/Target/X86/AsmPrinter/CMakeLists.txt)0
-rw-r--r--lib/Target/X86/InstPrinter/Makefile (renamed from lib/Target/X86/AsmPrinter/Makefile)0
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp (renamed from lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp)2
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h (renamed from lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h)0
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp (renamed from lib/Target/X86/AsmPrinter/X86InstComments.cpp)2
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.h (renamed from lib/Target/X86/AsmPrinter/X86InstComments.h)0
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp (renamed from lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp)3
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h (renamed from lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h)0
-rw-r--r--lib/Target/X86/Makefile2
-rw-r--r--lib/Target/X86/README-SSE.txt50
-rw-r--r--lib/Target/X86/README-X86-64.txt44
-rw-r--r--lib/Target/X86/README.txt335
-rw-r--r--lib/Target/X86/Utils/CMakeLists.txt6
-rw-r--r--lib/Target/X86/Utils/Makefile15
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp (renamed from lib/Target/X86/X86ShuffleDecode.h)53
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h69
-rw-r--r--lib/Target/X86/X86.h10
-rw-r--r--lib/Target/X86/X86.td28
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp270
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp97
-rw-r--r--lib/Target/X86/X86AsmPrinter.h2
-rw-r--r--lib/Target/X86/X86CallingConv.td67
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp21
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.cpp55
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.h19
-rw-r--r--lib/Target/X86/X86FastISel.cpp300
-rw-r--r--lib/Target/X86/X86FixupKinds.h16
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp129
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp994
-rw-r--r--lib/Target/X86/X86FrameLowering.h65
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp200
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp3146
-rw-r--r--lib/Target/X86/X86ISelLowering.h243
-rw-r--r--lib/Target/X86/X86Instr3DNow.td77
-rw-r--r--lib/Target/X86/X86Instr64bit.td2250
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td1125
-rw-r--r--lib/Target/X86/X86InstrBuilder.h37
-rw-r--r--lib/Target/X86/X86InstrCMovSetCC.td104
-rw-r--r--lib/Target/X86/X86InstrCompiler.td1626
-rw-r--r--lib/Target/X86/X86InstrControl.td294
-rw-r--r--lib/Target/X86/X86InstrExtension.td172
-rw-r--r--lib/Target/X86/X86InstrFPStack.td82
-rw-r--r--lib/Target/X86/X86InstrFormats.td24
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td107
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp448
-rw-r--r--lib/Target/X86/X86InstrInfo.h84
-rw-r--r--lib/Target/X86/X86InstrInfo.td4842
-rw-r--r--lib/Target/X86/X86InstrMMX.td607
-rw-r--r--lib/Target/X86/X86InstrSSE.td571
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td746
-rw-r--r--lib/Target/X86/X86InstrSystem.td390
-rw-r--r--lib/Target/X86/X86InstrVMX.td54
-rw-r--r--lib/Target/X86/X86JITInfo.cpp16
-rw-r--r--lib/Target/X86/X86MCAsmInfo.cpp15
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp149
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp117
-rw-r--r--lib/Target/X86/X86MCInstLower.h2
-rw-r--r--lib/Target/X86/X86MachObjectWriter.cpp32
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp955
-rw-r--r--lib/Target/X86/X86RegisterInfo.h17
-rw-r--r--lib/Target/X86/X86RegisterInfo.td100
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp52
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h9
-rw-r--r--lib/Target/X86/X86Subtarget.cpp18
-rw-r--r--lib/Target/X86/X86Subtarget.h36
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp55
-rw-r--r--lib/Target/X86/X86TargetMachine.h75
76 files changed, 11446 insertions, 10602 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
index 26797ab..ec73087 100644
--- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -65,9 +65,10 @@ public:
}
};
-}
+} // end anonymous namespace
-static unsigned MatchRegisterName(StringRef Name);
+#define GET_REGISTER_MATCHER
+#include "X86GenAsmMatcher.inc"
AsmToken X86AsmLexer::LexTokenATT() {
AsmToken lexedToken = lexDefinite();
@@ -162,7 +163,3 @@ extern "C" void LLVMInitializeX86AsmLexer() {
RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target);
RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target);
}
-
-#define REGISTERS_ONLY
-#include "X86GenAsmMatcher.inc"
-#undef REGISTERS_ONLY
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index f8588d8..1cac07a 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -10,20 +10,21 @@
#include "llvm/Target/TargetAsmParser.h"
#include "X86.h"
#include "X86Subtarget.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmParser.h"
using namespace llvm;
namespace {
@@ -43,35 +44,32 @@ private:
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
- bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
-
X86Operand *ParseOperand();
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
- bool MatchInstruction(SMLoc IDLoc,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst);
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
/// @name Auto-generated Matcher Functions
/// {
- unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
-
- bool MatchInstructionImpl(
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
+#define GET_ASSEMBLER_HEADER
+#include "X86GenAsmMatcher.inc"
/// }
public:
- X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
- : TargetAsmParser(T), Parser(_Parser), TM(TM) {
+ X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM)
+ : TargetAsmParser(T), Parser(parser), TM(TM) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(
&TM.getSubtarget<X86Subtarget>()));
}
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -81,16 +79,16 @@ public:
class X86_32ATTAsmParser : public X86ATTAsmParser {
public:
- X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
- : X86ATTAsmParser(T, _Parser, TM) {
+ X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
+ : X86ATTAsmParser(T, Parser, TM) {
Is64Bit = false;
}
};
class X86_64ATTAsmParser : public X86ATTAsmParser {
public:
- X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
- : X86ATTAsmParser(T, _Parser, TM) {
+ X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
+ : X86ATTAsmParser(T, Parser, TM) {
Is64Bit = true;
}
};
@@ -375,14 +373,18 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
// validation later, so maybe there is no need for this here.
RegNo = MatchRegisterName(Tok.getString());
+ // If the match failed, try the register name as lowercase.
+ if (RegNo == 0)
+ RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
+
// FIXME: This should be done using Requires<In32BitMode> and
// Requires<In64BitMode> so "eiz" usage in 64-bit instructions
// can be also checked.
if (RegNo == X86::RIZ && !Is64Bit)
return Error(Tok.getLoc(), "riz register in 64-bit mode only");
- // Parse %st(1) and "%st" as "%st(0)"
- if (RegNo == 0 && Tok.getString() == "st") {
+ // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
+ if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
RegNo = X86::ST0;
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat 'st'
@@ -617,88 +619,13 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
bool X86ATTAsmParser::
ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // The various flavors of pushf and popf use Requires<In32BitMode> and
- // Requires<In64BitMode>, but the assembler doesn't yet implement that.
- // For now, just do a manual check to prevent silent misencoding.
- if (Is64Bit) {
- if (Name == "popfl")
- return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
- else if (Name == "pushfl")
- return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
- else if (Name == "pusha")
- return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
- } else {
- if (Name == "popfq")
- return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
- else if (Name == "pushfq")
- return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
- }
-
- // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and
- // the form jrcxz is not allowed in 32-bit mode.
- if (Is64Bit) {
- if (Name == "jcxz")
- return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode");
- } else {
- if (Name == "jrcxz")
- return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode");
- }
-
- // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
- // represent alternative syntaxes in the .td file, without requiring
- // instruction duplication.
- StringRef PatchedName = StringSwitch<StringRef>(Name)
- .Case("sal", "shl")
- .Case("salb", "shlb")
- .Case("sall", "shll")
- .Case("salq", "shlq")
- .Case("salw", "shlw")
- .Case("repe", "rep")
- .Case("repz", "rep")
- .Case("repnz", "repne")
- .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
- .Case("popf", Is64Bit ? "popfq" : "popfl")
- .Case("retl", Is64Bit ? "retl" : "ret")
- .Case("retq", Is64Bit ? "ret" : "retq")
- .Case("setz", "sete")
- .Case("setnz", "setne")
- .Case("jz", "je")
- .Case("jnz", "jne")
- .Case("jc", "jb")
- // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode
- // jecxz requires an AdSize prefix but jecxz does not have a prefix in
- // 32-bit mode.
- .Case("jecxz", "jcxz")
- .Case("jrcxz", "jcxz")
- .Case("jna", "jbe")
- .Case("jnae", "jb")
- .Case("jnb", "jae")
- .Case("jnbe", "ja")
- .Case("jnc", "jae")
- .Case("jng", "jle")
- .Case("jnge", "jl")
- .Case("jnl", "jge")
- .Case("jnle", "jg")
- .Case("jpe", "jp")
- .Case("jpo", "jnp")
- .Case("cmovcl", "cmovbl")
- .Case("cmovcl", "cmovbl")
- .Case("cmovnal", "cmovbel")
- .Case("cmovnbl", "cmovael")
- .Case("cmovnbel", "cmoval")
- .Case("cmovncl", "cmovael")
- .Case("cmovngl", "cmovlel")
- .Case("cmovnl", "cmovgel")
- .Case("cmovngl", "cmovlel")
- .Case("cmovngel", "cmovll")
- .Case("cmovnll", "cmovgel")
- .Case("cmovnlel", "cmovgl")
- .Case("cmovnzl", "cmovnel")
- .Case("cmovzl", "cmovel")
- .Case("fwait", "wait")
- .Case("movzx", "movzb")
- .Default(Name);
+ StringRef PatchedName = Name;
+ // FIXME: Hack to recognize setneb as setne.
+ if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
+ PatchedName != "setb" && PatchedName != "setnb")
+ PatchedName = PatchedName.substr(0, Name.size()-1);
+
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
const MCExpr *ExtraImmOp = 0;
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
@@ -773,12 +700,26 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
PatchedName = "vpclmulqdq";
}
}
+
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
if (ExtraImmOp)
Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+ // Determine whether this is an instruction prefix.
+ bool isPrefix =
+ Name == "lock" || Name == "rep" ||
+ Name == "repe" || Name == "repz" ||
+ Name == "repne" || Name == "repnz" ||
+ Name == "rex64" || Name == "data16";
+
+
+ // This does the actual operand parsing. Don't parse any more if we have a
+ // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
+ // just want to parse the "lock" as the first instruction and the "incl" as
+ // the next one.
+ if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
// Parse '*' modifier.
if (getLexer().is(AsmToken::Star)) {
@@ -790,8 +731,10 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
// Read the first operand.
if (X86Operand *Op = ParseOperand())
Operands.push_back(Op);
- else
+ else {
+ Parser.EatToEndOfStatement();
return true;
+ }
while (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
@@ -799,23 +742,27 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
// Parse and remember the operand.
if (X86Operand *Op = ParseOperand())
Operands.push_back(Op);
- else
+ else {
+ Parser.EatToEndOfStatement();
return true;
+ }
}
- }
- // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
- if ((Name.startswith("shr") || Name.startswith("sar") ||
- Name.startswith("shl")) &&
- Operands.size() == 3 &&
- static_cast<X86Operand*>(Operands[1])->isImm() &&
- isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
- cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
- delete Operands[1];
- Operands.erase(Operands.begin() + 1);
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
}
- // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx".
+ if (getLexer().is(AsmToken::EndOfStatement))
+ Parser.Lex(); // Consume the EndOfStatement
+ else if (isPrefix && getLexer().is(AsmToken::Slash))
+ Parser.Lex(); // Consume the prefix separator Slash
+
+ // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
+ // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
+ // documented form in various unofficial manuals, so a lot of code uses it.
if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
Operands.size() == 3) {
X86Operand &Op = *(X86Operand*)Operands.back();
@@ -829,76 +776,80 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
- // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
- // "f{mul*,add*,sub*,div*} $op"
- if ((Name.startswith("fmul") || Name.startswith("fadd") ||
- Name.startswith("fsub") || Name.startswith("fdiv")) &&
- Operands.size() == 3 &&
- static_cast<X86Operand*>(Operands[2])->isReg() &&
- static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
- delete Operands[2];
- Operands.erase(Operands.begin() + 2);
- }
-
- // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
- // B".
- if (Name.startswith("imul") && Operands.size() == 3 &&
- static_cast<X86Operand*>(Operands[1])->isImm() &&
- static_cast<X86Operand*>(Operands.back())->isReg()) {
- X86Operand *Op = static_cast<X86Operand*>(Operands.back());
- Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
- Op->getEndLoc()));
- }
-
- return false;
-}
-
-bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
- StringRef IDVal = DirectiveID.getIdentifier();
- if (IDVal == ".word")
- return ParseDirectiveWord(2, DirectiveID.getLoc());
- return true;
-}
-
-/// ParseDirectiveWord
-/// ::= .word [ expression (, expression)* ]
-bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- for (;;) {
- const MCExpr *Value;
- if (getParser().ParseExpression(Value))
- return true;
-
- getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
-
- if (getLexer().is(AsmToken::EndOfStatement))
- break;
-
- // FIXME: Improve diagnostic.
- if (getLexer().isNot(AsmToken::Comma))
- return Error(L, "unexpected token in directive");
- Parser.Lex();
+ // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
+ // "shift <op>".
+ if ((Name.startswith("shr") || Name.startswith("sar") ||
+ Name.startswith("shl") || Name.startswith("sal") ||
+ Name.startswith("rcl") || Name.startswith("rcr") ||
+ Name.startswith("rol") || Name.startswith("ror")) &&
+ Operands.size() == 3) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
}
}
- Parser.Lex();
return false;
}
-
-bool
-X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
- const SmallVectorImpl<MCParsedAsmOperand*>
- &Operands,
- MCInst &Inst) {
+bool X86ATTAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out) {
assert(!Operands.empty() && "Unexpect empty operand list!");
-
X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+ // First, handle aliases that expand to multiple instructions.
+ // FIXME: This should be replaced with a real .td file alias mechanism.
+ // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
+ // call.
+ if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
+ Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
+ Op->getToken() == "finit" || Op->getToken() == "fsave" ||
+ Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
+ MCInst Inst;
+ Inst.setOpcode(X86::WAIT);
+ Out.EmitInstruction(Inst);
+
+ const char *Repl =
+ StringSwitch<const char*>(Op->getToken())
+ .Case("finit", "fninit")
+ .Case("fsave", "fnsave")
+ .Case("fstcw", "fnstcw")
+ .Case("fstcww", "fnstcw")
+ .Case("fstenv", "fnstenv")
+ .Case("fstsw", "fnstsw")
+ .Case("fstsww", "fnstsw")
+ .Case("fclex", "fnclex")
+ .Default(0);
+ assert(Repl && "Unknown wait-prefixed instruction");
+ delete Operands[0];
+ Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
+ }
+
+ bool WasOriginallyInvalidOperand = false;
+ unsigned OrigErrorInfo;
+ MCInst Inst;
+
// First, try a direct match.
- if (!MatchInstructionImpl(Operands, Inst))
+ switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+ case Match_Success:
+ Out.EmitInstruction(Inst);
return false;
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_ConversionFail:
+ return Error(IDLoc, "unable to convert operands to instruction");
+ case Match_InvalidOperand:
+ WasOriginallyInvalidOperand = true;
+ break;
+ case Match_MnemonicFail:
+ break;
+ }
// FIXME: Ideally, we would only attempt suffix matches for things which are
// valid prefixes, and we could just infer the right unambiguous
@@ -912,15 +863,26 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
Tmp += ' ';
Op->setTokenValue(Tmp.str());
+ // If this instruction starts with an 'f', then it is a floating point stack
+ // instruction. These come in up to three forms for 32-bit, 64-bit, and
+ // 80-bit floating point, which use the suffixes s,l,t respectively.
+ //
+ // Otherwise, we assume that this may be an integer instruction, which comes
+ // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
+ const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
+
// Check for the various suffix matches.
- Tmp[Base.size()] = 'b';
- bool MatchB = MatchInstructionImpl(Operands, Inst);
- Tmp[Base.size()] = 'w';
- bool MatchW = MatchInstructionImpl(Operands, Inst);
- Tmp[Base.size()] = 'l';
- bool MatchL = MatchInstructionImpl(Operands, Inst);
- Tmp[Base.size()] = 'q';
- bool MatchQ = MatchInstructionImpl(Operands, Inst);
+ Tmp[Base.size()] = Suffixes[0];
+ unsigned ErrorInfoIgnore;
+ MatchResultTy Match1, Match2, Match3, Match4;
+
+ Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Tmp[Base.size()] = Suffixes[1];
+ Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Tmp[Base.size()] = Suffixes[2];
+ Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Tmp[Base.size()] = Suffixes[3];
+ Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
// Restore the old token.
Op->setTokenValue(Base);
@@ -928,24 +890,25 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
- if (MatchB + MatchW + MatchL + MatchQ == 3)
+ unsigned NumSuccessfulMatches =
+ (Match1 == Match_Success) + (Match2 == Match_Success) +
+ (Match3 == Match_Success) + (Match4 == Match_Success);
+ if (NumSuccessfulMatches == 1) {
+ Out.EmitInstruction(Inst);
return false;
+ }
- // Otherwise, the match failed.
+ // Otherwise, the match failed, try to produce a decent error message.
// If we had multiple suffix matches, then identify this as an ambiguous
// match.
- if (MatchB + MatchW + MatchL + MatchQ != 4) {
+ if (NumSuccessfulMatches > 1) {
char MatchChars[4];
unsigned NumMatches = 0;
- if (!MatchB)
- MatchChars[NumMatches++] = 'b';
- if (!MatchW)
- MatchChars[NumMatches++] = 'w';
- if (!MatchL)
- MatchChars[NumMatches++] = 'l';
- if (!MatchQ)
- MatchChars[NumMatches++] = 'q';
+ if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
+ if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
+ if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
+ if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
SmallString<126> Msg;
raw_svector_ostream OS(Msg);
@@ -959,14 +922,90 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
}
OS << ")";
Error(IDLoc, OS.str());
- } else {
- // FIXME: We should give nicer diagnostics about the exact failure.
- Error(IDLoc, "unrecognized instruction");
+ return true;
}
+ // Okay, we know that none of the variants matched successfully.
+
+ // If all of the instructions reported an invalid mnemonic, then the original
+ // mnemonic was invalid.
+ if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
+ (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
+ if (!WasOriginallyInvalidOperand) {
+ Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
+ return true;
+ }
+
+ // Recover location info for the operand if we know which was the problem.
+ SMLoc ErrorLoc = IDLoc;
+ if (OrigErrorInfo != ~0U) {
+ if (OrigErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+
+ // If one instruction matched with a missing feature, report this as a
+ // missing feature.
+ if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
+ (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ }
+
+ // If one instruction matched with an invalid operand, report this as an
+ // operand failure.
+ if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
+ (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
+ Error(IDLoc, "invalid operand for instruction");
+ return true;
+ }
+
+ // If all of these were an outright failure, report it in a useless way.
+ // FIXME: We should give nicer diagnostics about the exact failure.
+ Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
+ return true;
+}
+
+
+bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
return true;
}
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+
+
extern "C" void LLVMInitializeX86AsmLexer();
@@ -977,4 +1016,6 @@ extern "C" void LLVMInitializeX86AsmParser() {
LLVMInitializeX86AsmLexer();
}
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
#include "X86GenAsmMatcher.inc"
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index e9399f5..b5fa94f 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -24,10 +24,12 @@ set(sources
X86ELFWriterInfo.cpp
X86FastISel.cpp
X86FloatingPoint.cpp
+ X86FrameLowering.cpp
X86ISelDAGToDAG.cpp
X86ISelLowering.cpp
X86InstrInfo.cpp
X86JITInfo.cpp
+ X86MachObjectWriter.cpp
X86MCAsmInfo.cpp
X86MCCodeEmitter.cpp
X86MCInstLower.cpp
@@ -39,14 +41,24 @@ set(sources
)
if( CMAKE_CL_64 )
+ # A workaround for a bug in cmake 2.8.3. See PR 8885.
+ if( CMAKE_VERSION STREQUAL "2.8.3" )
+ include(CMakeDetermineCompilerId)
+ endif()
+ # end of workaround.
enable_language(ASM_MASM)
ADD_CUSTOM_COMMAND(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj
+ MAIN_DEPENDENCY X86CompilationCallback_Win64.asm
COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
- DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
)
set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj)
endif()
add_llvm_target(X86CodeGen ${sources})
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
+add_subdirectory(Utils)
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 97589c0..972a0d9 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -5,7 +5,7 @@ add_llvm_library(LLVMX86Disassembler
X86DisassemblerDecoder.c
)
# workaround for hanging compilation on MSVC9 and 10
-if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
set_property(
SOURCE X86Disassembler.cpp
PROPERTY COMPILE_FLAGS "/Od"
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 09f1584..691e2d7 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -157,9 +157,8 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
/// @param immediate - The immediate value to append.
/// @param operand - The operand, as stored in the descriptor table.
/// @param insn - The internal instruction.
-static void translateImmediate(MCInst &mcInst,
- uint64_t immediate,
- OperandSpecifier &operand,
+static void translateImmediate(MCInst &mcInst, uint64_t immediate,
+ const OperandSpecifier &operand,
InternalInstruction &insn) {
// Sign-extend the immediate if necessary.
@@ -392,9 +391,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
/// @param insn - The instruction to extract Mod, R/M, and SIB fields
/// from.
/// @return - 0 on success; nonzero otherwise
-static bool translateRM(MCInst &mcInst,
- OperandSpecifier &operand,
- InternalInstruction &insn) {
+static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
+ InternalInstruction &insn) {
switch (operand.type) {
default:
debug("Unexpected type for a R/M operand");
@@ -461,9 +459,8 @@ static bool translateFPRegister(MCInst &mcInst,
/// @param operand - The operand, as stored in the descriptor table.
/// @param insn - The internal instruction.
/// @return - false on success; true otherwise.
-static bool translateOperand(MCInst &mcInst,
- OperandSpecifier &operand,
- InternalInstruction &insn) {
+static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
+ InternalInstruction &insn) {
switch (operand.encoding) {
default:
debug("Unhandled operand encoding during translation");
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 9c54262..550cf9d 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -78,7 +78,7 @@
const char* name;
#define INSTRUCTION_IDS \
- InstrUID* instructionIDs;
+ const InstrUID *instructionIDs;
#include "X86DisassemblerDecoderCommon.h"
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 6c3ff6b..b6546fc 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -27,12 +27,6 @@
typedef int8_t bool;
-#ifdef __GNUC__
-#define NORETURN __attribute__((noreturn))
-#else
-#define NORETURN
-#endif
-
#ifndef NDEBUG
#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
#else
@@ -103,7 +97,7 @@ static InstrUID decode(OpcodeType type,
InstructionContext insnContext,
uint8_t opcode,
uint8_t modRM) {
- struct ModRMDecision* dec;
+ const struct ModRMDecision* dec;
switch (type) {
default:
@@ -147,7 +141,7 @@ static InstrUID decode(OpcodeType type,
* decode(); specifierForUID will not check bounds.
* @return - A pointer to the specification for that instruction.
*/
-static struct InstructionSpecifier* specifierForUID(InstrUID uid) {
+static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
return &INSTRUCTIONS_SYM[uid];
}
@@ -296,7 +290,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
BOOL isPrefix = TRUE;
BOOL prefixGroups[4] = { FALSE };
uint64_t prefixLocation;
- uint8_t byte;
+ uint8_t byte = 0;
BOOL hasAdSize = FALSE;
BOOL hasOpSize = FALSE;
@@ -394,6 +388,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
}
} else {
unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
if (insn->mode == MODE_16BIT) {
@@ -405,7 +400,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->registerSize = (hasOpSize ? 2 : 4);
insn->addressSize = (hasAdSize ? 2 : 4);
insn->displacementSize = (hasAdSize ? 2 : 4);
- insn->immediateSize = (hasAdSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
} else if (insn->mode == MODE_64BIT) {
if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
insn->registerSize = 8;
@@ -517,7 +512,8 @@ static int getIDWithAttrMask(uint16_t* instructionID,
insn->opcode);
if (hasModRMExtension) {
- readModRM(insn);
+ if (readModRM(insn))
+ return -1;
*instructionID = decode(insn->opcodeType,
instructionClass,
@@ -632,9 +628,9 @@ static int getID(struct InternalInstruction* insn) {
* instead of F2 changes a 32 to a 64, we adopt the new encoding.
*/
- struct InstructionSpecifier* spec;
+ const struct InstructionSpecifier *spec;
uint16_t instructionIDWithREXw;
- struct InstructionSpecifier* specWithREXw;
+ const struct InstructionSpecifier *specWithREXw;
spec = specifierForUID(instructionID);
@@ -672,9 +668,9 @@ static int getID(struct InternalInstruction* insn) {
* in the right place we check if there's a 16-bit operation.
*/
- struct InstructionSpecifier* spec;
+ const struct InstructionSpecifier *spec;
uint16_t instructionIDWithOpsize;
- struct InstructionSpecifier* specWithOpsize;
+ const struct InstructionSpecifier *specWithOpsize;
spec = specifierForUID(instructionID);
@@ -866,7 +862,8 @@ static int readModRM(struct InternalInstruction* insn) {
if (insn->consumedModRM)
return 0;
- consumeByte(insn, &insn->modRM);
+ if (consumeByte(insn, &insn->modRM))
+ return -1;
insn->consumedModRM = TRUE;
mod = modFromModRM(insn->modRM);
@@ -1067,7 +1064,7 @@ GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
* invalid for its class.
*/
static int fixupReg(struct InternalInstruction *insn,
- struct OperandSpecifier *op) {
+ const struct OperandSpecifier *op) {
uint8_t valid;
dbgprintf(insn, "fixupReg()");
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 28ba86b..4f4fbcd 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -24,7 +24,7 @@ extern "C" {
const char* name;
#define INSTRUCTION_IDS \
- InstrUID* instructionIDs;
+ const InstrUID *instructionIDs;
#include "X86DisassemblerDecoderCommon.h"
@@ -423,7 +423,7 @@ struct InternalInstruction {
/* The instruction ID, extracted from the decode table */
uint16_t instructionID;
/* The specifier for the instruction, from the instruction info table */
- struct InstructionSpecifier* spec;
+ const struct InstructionSpecifier *spec;
/* state for additional bytes, consumed during operand decode. Pattern:
consumed___ indicates that the byte was already consumed and does not
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 0f33f52..1425b86 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -22,7 +22,7 @@
#ifndef X86DISASSEMBLERDECODERCOMMON_H
#define X86DISASSEMBLERDECODERCOMMON_H
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
#define CONTEXTS_SYM x86DisassemblerContexts
@@ -248,6 +248,7 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_M64, "8-byte") \
ENUM_ENTRY(TYPE_LEA, "Effective address") \
ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
+ ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \
ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/InstPrinter/CMakeLists.txt
index 033973e..033973e 100644
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/X86/InstPrinter/CMakeLists.txt
diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/InstPrinter/Makefile
index c82aa33..c82aa33 100644
--- a/lib/Target/X86/AsmPrinter/Makefile
+++ b/lib/Target/X86/InstPrinter/Makefile
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 554b96c..d6950f4 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -25,10 +25,8 @@
using namespace llvm;
// Include the auto-generated portion of the assembly writer.
-#define MachineInstr MCInst
#define GET_INSTRUCTION_NAME
#include "X86GenAsmWriter.inc"
-#undef MachineInstr
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
printInstruction(MI, OS);
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index eb98664..eb98664 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index da9d5a3..12144e3 100644
--- a/lib/Target/X86/AsmPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -16,7 +16,7 @@
#include "X86GenInstrNames.inc"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/raw_ostream.h"
-#include "../X86ShuffleDecode.h"
+#include "../Utils/X86ShuffleDecode.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.h b/lib/Target/X86/InstPrinter/X86InstComments.h
index 6b86db4..6b86db4 100644
--- a/lib/Target/X86/AsmPrinter/X86InstComments.h
+++ b/lib/Target/X86/InstPrinter/X86InstComments.h
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 5625b0e..0484529 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -21,13 +21,12 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "X86GenInstrNames.inc"
+#include <cctype>
using namespace llvm;
// Include the auto-generated portion of the assembly writer.
-#define MachineInstr MCInst
#define GET_INSTRUCTION_NAME
#include "X86GenAsmWriter1.inc"
-#undef MachineInstr
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
printInstruction(MI, OS);
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 6f12032..6f12032 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index f4ff894..12fb090 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -20,6 +20,6 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
X86GenCallingConv.inc X86GenSubtarget.inc \
X86GenEDInfo.inc
-DIRS = AsmPrinter AsmParser Disassembler TargetInfo
+DIRS = InstPrinter AsmParser Disassembler TargetInfo Utils
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index f96b22f..f16ec02 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -20,7 +20,28 @@ __m128i shift_right(__m128i value, unsigned long offset) {
//===---------------------------------------------------------------------===//
SSE has instructions for doing operations on complex numbers, we should pattern
-match them. Compiling this:
+match them. For example, this should turn into a horizontal add:
+
+typedef float __attribute__((vector_size(16))) v4f32;
+float f32(v4f32 A) {
+ return A[0]+A[1]+A[2]+A[3];
+}
+
+Instead we get this:
+
+_f32: ## @f32
+ pshufd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0,0,0]
+ addss %xmm0, %xmm1
+ pshufd $3, %xmm0, %xmm2 ## xmm2 = xmm0[3,0,0,0]
+ movhlps %xmm0, %xmm0 ## xmm0 = xmm0[1,1]
+ movaps %xmm0, %xmm3
+ addss %xmm1, %xmm3
+ movdqa %xmm2, %xmm0
+ addss %xmm3, %xmm0
+ ret
+
+Also, there are cases where some simple local SLP would improve codegen a bit.
+compiling this:
_Complex float f32(_Complex float A, _Complex float B) {
return A+B;
@@ -28,19 +49,17 @@ _Complex float f32(_Complex float A, _Complex float B) {
into:
-_f32:
+_f32: ## @f32
movdqa %xmm0, %xmm2
addss %xmm1, %xmm2
- pshufd $16, %xmm2, %xmm2
- pshufd $1, %xmm1, %xmm1
- pshufd $1, %xmm0, %xmm0
- addss %xmm1, %xmm0
- pshufd $16, %xmm0, %xmm1
- movdqa %xmm2, %xmm0
- unpcklps %xmm1, %xmm0
+ pshufd $1, %xmm1, %xmm1 ## xmm1 = xmm1[1,0,0,0]
+ pshufd $1, %xmm0, %xmm3 ## xmm3 = xmm0[1,0,0,0]
+ addss %xmm1, %xmm3
+ movaps %xmm2, %xmm0
+ unpcklps %xmm3, %xmm0 ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
ret
-seems silly.
+seems silly when it could just be one addps.
//===---------------------------------------------------------------------===//
@@ -904,4 +923,15 @@ The insertps's of $0 are pointless complex copies.
//===---------------------------------------------------------------------===//
+If SSE4.1 is available we should inline rounding functions instead of emitting
+a libcall.
+
+floor: roundsd $0x01, %xmm, %xmm
+ceil: roundsd $0x02, %xmm, %xmm
+and likewise for the single precision versions.
+
+Currently, SelectionDAGBuilder doesn't turn calls to these functions into the
+corresponding nodes and some targets (including X86) aren't ready for them.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index 78c4dc0..e21d69a 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -41,50 +41,6 @@ saved a few instructions.
//===---------------------------------------------------------------------===//
-Poor codegen:
-
-int X[2];
-int b;
-void test(void) {
- memset(X, b, 2*sizeof(X[0]));
-}
-
-llc:
- movq _b@GOTPCREL(%rip), %rax
- movzbq (%rax), %rax
- movq %rax, %rcx
- shlq $8, %rcx
- orq %rax, %rcx
- movq %rcx, %rax
- shlq $16, %rax
- orq %rcx, %rax
- movq %rax, %rcx
- shlq $32, %rcx
- movq _X@GOTPCREL(%rip), %rdx
- orq %rax, %rcx
- movq %rcx, (%rdx)
- ret
-
-gcc:
- movq _b@GOTPCREL(%rip), %rax
- movabsq $72340172838076673, %rdx
- movzbq (%rax), %rax
- imulq %rdx, %rax
- movq _X@GOTPCREL(%rip), %rdx
- movq %rax, (%rdx)
- ret
-
-And the codegen is even worse for the following
-(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
- void fill1(char *s, int a)
- {
- __builtin_memset(s, a, 15);
- }
-
-For this version, we duplicate the computation of the constant to store.
-
-//===---------------------------------------------------------------------===//
-
It's not possible to reference AH, BH, CH, and DH registers in an instruction
requiring REX prefix. However, divb and mulb both produce results in AH. If isel
emits a CopyFromReg which gets turned into a movb and that can be allocated a
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index a305ae6..c10e170 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -67,19 +67,6 @@ cmovs, we should expand to a conditional branch like GCC produces.
//===---------------------------------------------------------------------===//
-Compile this:
-_Bool f(_Bool a) { return a!=1; }
-
-into:
- movzbl %dil, %eax
- xorl $1, %eax
- ret
-
-(Although note that this isn't a legal way to express the code that llvm-gcc
-currently generates for that function.)
-
-//===---------------------------------------------------------------------===//
-
Some isel ideas:
1. Dynamic programming based approach when compile time if not an
@@ -109,6 +96,37 @@ It appears icc use push for parameter passing. Need to investigate.
//===---------------------------------------------------------------------===//
+This:
+
+void foo(void);
+void bar(int x, int *P) {
+ x >>= 2;
+ if (x)
+ foo();
+ *P = x;
+}
+
+compiles into:
+
+ movq %rsi, %rbx
+ movl %edi, %r14d
+ sarl $2, %r14d
+ testl %r14d, %r14d
+ je LBB0_2
+
+Instead of doing an explicit test, we can use the flags off the sar. This
+occurs in a bigger testcase like this, which is pretty common:
+
+#include <vector>
+int test1(std::vector<int> &X) {
+ int Sum = 0;
+ for (long i = 0, e = X.size(); i != e; ++i)
+ X[i] = 0;
+ return Sum;
+}
+
+//===---------------------------------------------------------------------===//
+
Only use inc/neg/not instructions on processors where they are faster than
add/sub/xor. They are slower on the P4 due to only updating some processor
flags.
@@ -394,72 +412,8 @@ boundary to improve performance.
//===---------------------------------------------------------------------===//
-Codegen:
-
-int f(int a, int b) {
- if (a == 4 || a == 6)
- b++;
- return b;
-}
-
-
-as:
-
-or eax, 2
-cmp eax, 6
-jz label
-
-//===---------------------------------------------------------------------===//
-
GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting
-simplifications for integer "x cmp y ? a : b". For example, instead of:
-
-int G;
-void f(int X, int Y) {
- G = X < 0 ? 14 : 13;
-}
-
-compiling to:
-
-_f:
- movl $14, %eax
- movl $13, %ecx
- movl 4(%esp), %edx
- testl %edx, %edx
- cmovl %eax, %ecx
- movl %ecx, _G
- ret
-
-it could be:
-_f:
- movl 4(%esp), %eax
- sarl $31, %eax
- notl %eax
- addl $14, %eax
- movl %eax, _G
- ret
-
-etc.
-
-Another is:
-int usesbb(unsigned int a, unsigned int b) {
- return (a < b ? -1 : 0);
-}
-to:
-_usesbb:
- movl 8(%esp), %eax
- cmpl %eax, 4(%esp)
- sbbl %eax, %eax
- ret
-
-instead of:
-_usesbb:
- xorl %eax, %eax
- movl 8(%esp), %ecx
- cmpl %ecx, 4(%esp)
- movl $4294967295, %ecx
- cmovb %ecx, %eax
- ret
+simplifications for integer "x cmp y ? a : b".
//===---------------------------------------------------------------------===//
@@ -756,23 +710,17 @@ This:
{ return !full_add(a, b).second; }
Should compile to:
+ addl %esi, %edi
+ setae %al
+ movzbl %al, %eax
+ ret
-
- _Z11no_overflowjj:
- addl %edi, %esi
- setae %al
- ret
-
-FIXME: That code looks wrong; bool return is normally defined as zext.
-
-on x86-64, not:
-
-__Z11no_overflowjj:
- addl %edi, %esi
- cmpl %edi, %esi
- setae %al
- movzbl %al, %eax
- ret
+on x86-64, instead of the rather stupid-looking:
+ addl %esi, %edi
+ setb %al
+ xorb $1, %al
+ movzbl %al, %eax
+ ret
//===---------------------------------------------------------------------===//
@@ -1040,10 +988,10 @@ _foo:
instead of:
_foo:
- movl $255, %eax
- orl 4(%esp), %eax
- andl $65535, %eax
- ret
+ movl $65280, %eax
+ andl 4(%esp), %eax
+ orl $255, %eax
+ ret
//===---------------------------------------------------------------------===//
@@ -1165,58 +1113,6 @@ abs:
//===---------------------------------------------------------------------===//
-Consider:
-int test(unsigned long a, unsigned long b) { return -(a < b); }
-
-We currently compile this to:
-
-define i32 @test(i32 %a, i32 %b) nounwind {
- %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1]
- %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
- %tmp5 = sub i32 0, %tmp34 ; <i32> [#uses=1]
- ret i32 %tmp5
-}
-
-and
-
-_test:
- movl 8(%esp), %eax
- cmpl %eax, 4(%esp)
- setb %al
- movzbl %al, %eax
- negl %eax
- ret
-
-Several deficiencies here. First, we should instcombine zext+neg into sext:
-
-define i32 @test2(i32 %a, i32 %b) nounwind {
- %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1]
- %tmp34 = sext i1 %tmp3 to i32 ; <i32> [#uses=1]
- ret i32 %tmp34
-}
-
-However, before we can do that, we have to fix the bad codegen that we get for
-sext from bool:
-
-_test2:
- movl 8(%esp), %eax
- cmpl %eax, 4(%esp)
- setb %al
- movzbl %al, %eax
- shll $31, %eax
- sarl $31, %eax
- ret
-
-This code should be at least as good as the code above. Once this is fixed, we
-can optimize this specific case even more to:
-
- movl 8(%esp), %eax
- xorl %ecx, %ecx
- cmpl %eax, 4(%esp)
- sbbl %ecx, %ecx
-
-//===---------------------------------------------------------------------===//
-
Take the following code (from
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541):
@@ -1605,6 +1501,8 @@ loop, the value comes into the loop as two values, and
RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the
constructed BUILD_PAIR which represents the cast value.
+This can be handled by making CodeGenPrepare sink the cast.
+
//===---------------------------------------------------------------------===//
Test instructions can be eliminated by using EFLAGS values from arithmetic
@@ -1736,46 +1634,6 @@ Ideal output:
//===---------------------------------------------------------------------===//
-Testcase:
-int x(int a) { return (a & 0x80) ? 0x100 : 0; }
-int y(int a) { return (a & 0x80) *2; }
-
-Current:
- testl $128, 4(%esp)
- setne %al
- movzbl %al, %eax
- shll $8, %eax
- ret
-
-Better:
- movl 4(%esp), %eax
- addl %eax, %eax
- andl $256, %eax
- ret
-
-This is another general instcombine transformation that is profitable on all
-targets. In LLVM IR, these functions look like this:
-
-define i32 @x(i32 %a) nounwind readnone {
-entry:
- %0 = and i32 %a, 128
- %1 = icmp eq i32 %0, 0
- %iftmp.0.0 = select i1 %1, i32 0, i32 256
- ret i32 %iftmp.0.0
-}
-
-define i32 @y(i32 %a) nounwind readnone {
-entry:
- %0 = shl i32 %a, 1
- %1 = and i32 %0, 256
- ret i32 %1
-}
-
-Replacing an icmp+select with a shift should always be considered profitable in
-instcombine.
-
-//===---------------------------------------------------------------------===//
-
Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch
properly.
@@ -1960,3 +1818,100 @@ load, making it non-trivial to determine if there's anything between
the load and the store which would prohibit narrowing.
//===---------------------------------------------------------------------===//
+
+This code:
+void foo(unsigned x) {
+ if (x == 0) bar();
+ else if (x == 1) qux();
+}
+
+currently compiles into:
+_foo:
+ movl 4(%esp), %eax
+ cmpl $1, %eax
+ je LBB0_3
+ testl %eax, %eax
+ jne LBB0_4
+
+the testl could be removed:
+_foo:
+ movl 4(%esp), %eax
+ cmpl $1, %eax
+ je LBB0_3
+ jb LBB0_4
+
+0 is the only unsigned number < 1.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+%0 = type { i32, i1 }
+
+define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+ %uadd = tail call %0 @llvm.uadd.with.overflow.i32(i32 %sum, i32 %x)
+ %cmp = extractvalue %0 %uadd, 1
+ %inc = zext i1 %cmp to i32
+ %add = add i32 %x, %sum
+ %z.0 = add i32 %add, %inc
+ ret i32 %z.0
+}
+
+declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+
+compiles to:
+
+_add32carry: ## @add32carry
+ addl %esi, %edi
+ sbbl %ecx, %ecx
+ movl %edi, %eax
+ subl %ecx, %eax
+ ret
+
+But it could be:
+
+_add32carry:
+ leal (%rsi,%rdi), %eax
+ cmpl %esi, %eax
+ adcl $0, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+This:
+char t(char c) {
+ return c/3;
+}
+
+Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer
+
+_t: ## @t
+ movslq %edi, %rax
+ imulq $-1431655765, %rax, %rcx ## imm = 0xFFFFFFFFAAAAAAAB
+ shrq $32, %rcx
+ addl %ecx, %eax
+ movl %eax, %ecx
+ shrl $31, %ecx
+ shrl %eax
+ addl %ecx, %eax
+ movsbl %al, %eax
+ ret
+
+GCC gets:
+
+_t:
+ movl $86, %eax
+ imulb %dil
+ shrw $8, %ax
+ sarb $7, %dil
+ subb %dil, %al
+ movsbl %al,%eax
+ ret
+
+which is nicer. This also happens for int, not just char.
+
+//===---------------------------------------------------------------------===//
+
+
+
diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt
new file mode 100644
index 0000000..3ad5f99
--- /dev/null
+++ b/lib/Target/X86/Utils/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Utils
+ X86ShuffleDecode.cpp
+ )
+add_dependencies(LLVMX86Utils X86CodeGenTable_gen)
diff --git a/lib/Target/X86/Utils/Makefile b/lib/Target/X86/Utils/Makefile
new file mode 100644
index 0000000..1df6f0f
--- /dev/null
+++ b/lib/Target/X86/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Utils
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index df04052..1287977 100644
--- a/lib/Target/X86/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -12,21 +12,14 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_SHUFFLE_DECODE_H
-#define X86_SHUFFLE_DECODE_H
-
-#include "llvm/ADT/SmallVector.h"
-using namespace llvm;
+#include "X86ShuffleDecode.h"
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
//===----------------------------------------------------------------------===//
-enum {
- SM_SentinelZero = ~0U
-};
+namespace llvm {
-static inline
void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
// Defaults the copying the dest value.
ShuffleMask.push_back(0);
@@ -51,8 +44,8 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
}
// <3,1> or <6,7,2,3>
-static void DecodeMOVHLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = NElts/2; i != NElts; ++i)
ShuffleMask.push_back(NElts+i);
@@ -61,8 +54,8 @@ static void DecodeMOVHLPSMask(unsigned NElts,
}
// <0,2> or <0,1,4,5>
-static void DecodeMOVLHPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i)
ShuffleMask.push_back(i);
@@ -70,16 +63,16 @@ static void DecodeMOVLHPSMask(unsigned NElts,
ShuffleMask.push_back(NElts+i);
}
-static void DecodePSHUFMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts; ++i) {
ShuffleMask.push_back(Imm % NElts);
Imm /= NElts;
}
}
-static void DecodePSHUFHWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
ShuffleMask.push_back(0);
ShuffleMask.push_back(1);
ShuffleMask.push_back(2);
@@ -90,8 +83,8 @@ static void DecodePSHUFHWMask(unsigned Imm,
}
}
-static void DecodePSHUFLWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != 4; ++i) {
ShuffleMask.push_back((Imm & 3));
Imm >>= 2;
@@ -102,24 +95,24 @@ static void DecodePSHUFLWMask(unsigned Imm,
ShuffleMask.push_back(7);
}
-static void DecodePUNPCKLMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePUNPCKLMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(i);
ShuffleMask.push_back(i+NElts);
}
}
-static void DecodePUNPCKHMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(i+NElts/2);
ShuffleMask.push_back(i+NElts+NElts/2);
}
}
-static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
// Part that reads from dest.
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(Imm % NElts);
@@ -132,8 +125,8 @@ static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
}
}
-static void DecodeUNPCKHPMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(i+NElts/2); // Reads from dest
ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src
@@ -144,12 +137,12 @@ static void DecodeUNPCKHPMask(unsigned NElts,
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// etc. NElts indicates the number of elements in the vector allowing it to
/// handle different datatypes and vector widths.
-static void DecodeUNPCKLPMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKLPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(i); // Reads from dest
ShuffleMask.push_back(i+NElts); // Reads from src
}
}
-#endif
+} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
new file mode 100644
index 0000000..50d9ccb
--- /dev/null
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -0,0 +1,69 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_SHUFFLE_DECODE_H
+#define X86_SHUFFLE_DECODE_H
+
+#include "llvm/ADT/SmallVector.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+enum {
+ SM_SentinelZero = ~0U
+};
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc. NElts indicates the number of elements in the vector allowing it to
+/// handle different datatypes and vector widths.
+void DecodeUNPCKLPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+} // llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 27e8850..0ca4366 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -15,6 +15,7 @@
#ifndef TARGET_X86_H
#define TARGET_X86_H
+#include "llvm/Support/DataTypes.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -23,11 +24,13 @@ class FunctionPass;
class JITCodeEmitter;
class MCCodeEmitter;
class MCContext;
+class MCObjectWriter;
class MachineCodeEmitter;
class Target;
class TargetAsmBackend;
class X86TargetMachine;
class formatted_raw_ostream;
+class raw_ostream;
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
@@ -74,6 +77,13 @@ FunctionPass *createEmitX86CodeToMemory();
///
FunctionPass *createX86MaxStackAlignmentHeuristicPass();
+
+/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
extern Target TheX86_32Target, TheX86_64Target;
} // End llvm namespace
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index a19f1ac..efb6c8c 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -23,6 +23,9 @@ include "llvm/Target/Target.td"
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
"Enable conditional move instructions">;
+def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
+ "Support POPCNT instruction">;
+
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
"Enable MMX instructions">;
@@ -45,7 +48,7 @@ def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
[FeatureSSSE3]>;
def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
- [FeatureSSE41]>;
+ [FeatureSSE41, FeaturePOPCNT]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
"Enable 3DNow! instructions">;
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
@@ -63,7 +66,8 @@ def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
"IsUAMemFast", "true",
"Fast unaligned memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
- "Support SSE 4a instructions">;
+ "Support SSE 4a instructions",
+ [FeaturePOPCNT]>;
def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
"Enable AVX instructions">;
@@ -112,11 +116,13 @@ def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
FeatureFastUAMem]>;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
- FeatureFastUAMem, FeatureAES]>;
-// Sandy Bridge does not have FMA
-// FIXME: Wikipedia says it does... it should have AES as well.
-def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
+def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem, FeatureAES, FeatureCLMUL]>;
+// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
+// rather than a superset.
+// FIXME: Disabling AVX for now since it's not ready.
+def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit,
+ FeatureAES, FeatureCLMUL]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
@@ -176,7 +182,7 @@ include "X86CallingConv.td"
//===----------------------------------------------------------------------===//
-// Assembly Printers
+// Assembly Parser
//===----------------------------------------------------------------------===//
// Currently the X86 assembly parser only supports ATT syntax.
@@ -191,15 +197,21 @@ def ATTAsmParser : AsmParser {
string RegisterPrefix = "%";
}
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
// The X86 target supports two different syntaxes for emitting machine code.
// This is controlled by the -x86-asm-syntax={att|intel}
def ATTAsmWriter : AsmWriter {
string AsmWriterClassName = "ATTInstPrinter";
int Variant = 0;
+ bit isMCAsmWriter = 1;
}
def IntelAsmWriter : AsmWriter {
string AsmWriterClassName = "IntelInstPrinter";
int Variant = 1;
+ bit isMCAsmWriter = 1;
}
def X86 : Target {
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 69dc967..da5f5b1 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -11,50 +11,83 @@
#include "X86.h"
#include "X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/MC/ELFObjectWriter.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MachObjectWriter.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
-
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
default: assert(0 && "invalid fixup kind!");
- case X86::reloc_pcrel_1byte:
+ case FK_PCRel_1:
case FK_Data_1: return 0;
- case X86::reloc_pcrel_2byte:
+ case FK_PCRel_2:
case FK_Data_2: return 1;
- case X86::reloc_pcrel_4byte:
+ case FK_PCRel_4:
case X86::reloc_riprel_4byte:
case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_signed_4byte:
+ case X86::reloc_global_offset_table:
case FK_Data_4: return 2;
+ case FK_PCRel_8:
case FK_Data_8: return 3;
}
}
namespace {
+
+class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
+ bool HasRelocationAddend)
+ : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {}
+};
+
class X86AsmBackend : public TargetAsmBackend {
public:
X86AsmBackend(const Target &T)
- : TargetAsmBackend(T) {}
+ : TargetAsmBackend() {}
+
+ unsigned getNumFixupKinds() const {
+ return X86::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
+ { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel},
+ { "reloc_signed_4byte", 0, 4 * 8, 0},
+ { "reloc_global_offset_table", 0, 4 * 8, 0}
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return TargetAsmBackend::getFixupKindInfo(Kind);
- void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF,
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const {
unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
- assert(Fixup.getOffset() + Size <= DF.getContents().size() &&
+ assert(Fixup.getOffset() + Size <= DataSize &&
"Invalid fixup offset!");
for (unsigned i = 0; i != Size; ++i)
- DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
+ Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
}
bool MayNeedRelaxation(const MCInst &Inst) const;
@@ -63,9 +96,9 @@ public:
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
};
-} // end anonymous namespace
+} // end anonymous namespace
-static unsigned getRelaxedOpcode(unsigned Op) {
+static unsigned getRelaxedOpcodeBranch(unsigned Op) {
switch (Op) {
default:
return Op;
@@ -90,16 +123,104 @@ static unsigned getRelaxedOpcode(unsigned Op) {
}
}
+static unsigned getRelaxedOpcodeArith(unsigned Op) {
+ switch (Op) {
+ default:
+ return Op;
+
+ // IMUL
+ case X86::IMUL16rri8: return X86::IMUL16rri;
+ case X86::IMUL16rmi8: return X86::IMUL16rmi;
+ case X86::IMUL32rri8: return X86::IMUL32rri;
+ case X86::IMUL32rmi8: return X86::IMUL32rmi;
+ case X86::IMUL64rri8: return X86::IMUL64rri32;
+ case X86::IMUL64rmi8: return X86::IMUL64rmi32;
+
+ // AND
+ case X86::AND16ri8: return X86::AND16ri;
+ case X86::AND16mi8: return X86::AND16mi;
+ case X86::AND32ri8: return X86::AND32ri;
+ case X86::AND32mi8: return X86::AND32mi;
+ case X86::AND64ri8: return X86::AND64ri32;
+ case X86::AND64mi8: return X86::AND64mi32;
+
+ // OR
+ case X86::OR16ri8: return X86::OR16ri;
+ case X86::OR16mi8: return X86::OR16mi;
+ case X86::OR32ri8: return X86::OR32ri;
+ case X86::OR32mi8: return X86::OR32mi;
+ case X86::OR64ri8: return X86::OR64ri32;
+ case X86::OR64mi8: return X86::OR64mi32;
+
+ // XOR
+ case X86::XOR16ri8: return X86::XOR16ri;
+ case X86::XOR16mi8: return X86::XOR16mi;
+ case X86::XOR32ri8: return X86::XOR32ri;
+ case X86::XOR32mi8: return X86::XOR32mi;
+ case X86::XOR64ri8: return X86::XOR64ri32;
+ case X86::XOR64mi8: return X86::XOR64mi32;
+
+ // ADD
+ case X86::ADD16ri8: return X86::ADD16ri;
+ case X86::ADD16mi8: return X86::ADD16mi;
+ case X86::ADD32ri8: return X86::ADD32ri;
+ case X86::ADD32mi8: return X86::ADD32mi;
+ case X86::ADD64ri8: return X86::ADD64ri32;
+ case X86::ADD64mi8: return X86::ADD64mi32;
+
+ // SUB
+ case X86::SUB16ri8: return X86::SUB16ri;
+ case X86::SUB16mi8: return X86::SUB16mi;
+ case X86::SUB32ri8: return X86::SUB32ri;
+ case X86::SUB32mi8: return X86::SUB32mi;
+ case X86::SUB64ri8: return X86::SUB64ri32;
+ case X86::SUB64mi8: return X86::SUB64mi32;
+
+ // CMP
+ case X86::CMP16ri8: return X86::CMP16ri;
+ case X86::CMP16mi8: return X86::CMP16mi;
+ case X86::CMP32ri8: return X86::CMP32ri;
+ case X86::CMP32mi8: return X86::CMP32mi;
+ case X86::CMP64ri8: return X86::CMP64ri32;
+ case X86::CMP64mi8: return X86::CMP64mi32;
+
+ // PUSH
+ case X86::PUSHi8: return X86::PUSHi32;
+ }
+}
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+ unsigned R = getRelaxedOpcodeArith(Op);
+ if (R != Op)
+ return R;
+ return getRelaxedOpcodeBranch(Op);
+}
+
bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ // Branches can always be relaxed.
+ if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
+ return true;
+
// Check if this instruction is ever relaxable.
- if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+ if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
return false;
- // If so, just assume it can be relaxed. Once we support relaxing more complex
- // instructions we should check that the instruction actually has symbolic
- // operands before doing this, but we need to be careful about things like
- // PCrel.
- return true;
+
+ // Check if it has an expression and is not RIP relative.
+ bool hasExp = false;
+ bool hasRIP = false;
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+ const MCOperand &Op = Inst.getOperand(i);
+ if (Op.isExpr())
+ hasExp = true;
+
+ if (Op.isReg() && Op.getReg() == X86::RIP)
+ hasRIP = true;
+ }
+
+ // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
+ // how we do relaxations?
+ return hasExp && !hasRIP;
}
// FIXME: Can tblgen help at all here to verify there aren't other instructions
@@ -123,10 +244,8 @@ void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
/// WriteNopData - Write optimal nops to the output file for the \arg Count
/// bytes. This returns the number of bytes written. It may return 0 if
/// the \arg Count is more than the maximum optimal nops.
-///
-/// FIXME this is X86 32-bit specific and should move to a better place.
bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
- static const uint8_t Nops[16][16] = {
+ static const uint8_t Nops[10][10] = {
// nop
{0x90},
// xchg %ax,%ax
@@ -147,32 +266,16 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
{0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
// nopw %cs:0L(%[re]ax,%[re]ax,1)
{0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
- // nopl 0(%[re]ax,%[re]ax,1)
- // nopw 0(%[re]ax,%[re]ax,1)
- {0x0f, 0x1f, 0x44, 0x00, 0x00,
- 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
- // nopw 0(%[re]ax,%[re]ax,1)
- // nopw 0(%[re]ax,%[re]ax,1)
- {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
- 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
- // nopw 0(%[re]ax,%[re]ax,1)
- // nopl 0L(%[re]ax) */
- {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
- 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
- // nopl 0L(%[re]ax)
- // nopl 0L(%[re]ax)
- {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
- 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
- // nopl 0L(%[re]ax)
- // nopl 0L(%[re]ax,%[re]ax,1)
- {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
- 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
};
// Write an optimal sequence for the first 15 bytes.
- uint64_t OptimalCount = (Count < 16) ? Count : 15;
- for (uint64_t i = 0, e = OptimalCount; i != e; i++)
- OW->Write8(Nops[OptimalCount - 1][i]);
+ const uint64_t OptimalCount = (Count < 16) ? Count : 15;
+ const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10;
+ for (uint64_t i = 0, e = Prefixes; i != e; i++)
+ OW->Write8(0x66);
+ const uint64_t Rest = OptimalCount - Prefixes;
+ for (uint64_t i = 0, e = Rest; i != e; i++)
+ OW->Write8(Nops[Rest - 1][i]);
// Finish with single byte nops.
for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
@@ -186,75 +289,60 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
namespace {
class ELFX86AsmBackend : public X86AsmBackend {
public:
- ELFX86AsmBackend(const Target &T)
- : X86AsmBackend(T) {
- HasAbsolutizedSet = true;
- HasScatteredSymbols = true;
+ Triple::OSType OSType;
+ ELFX86AsmBackend(const Target &T, Triple::OSType _OSType)
+ : X86AsmBackend(T), OSType(_OSType) {
+ HasReliableSymbolDifference = true;
}
- bool isVirtualSection(const MCSection &Section) const {
- const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
- return SE.getType() == MCSectionELF::SHT_NOBITS;;
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section);
+ return ES.getFlags() & ELF::SHF_MERGE;
}
};
class ELFX86_32AsmBackend : public ELFX86AsmBackend {
public:
- ELFX86_32AsmBackend(const Target &T)
- : ELFX86AsmBackend(T) {}
+ ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType)
+ : ELFX86AsmBackend(T, OSType) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return new ELFObjectWriter(OS, /*Is64Bit=*/false,
- /*IsLittleEndian=*/true,
- /*HasRelocationAddend=*/false);
+ return createELFObjectWriter(new X86ELFObjectWriter(false, OSType,
+ ELF::EM_386, false),
+ OS, /*IsLittleEndian*/ true);
}
};
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
public:
- ELFX86_64AsmBackend(const Target &T)
- : ELFX86AsmBackend(T) {}
+ ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType)
+ : ELFX86AsmBackend(T, OSType) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return new ELFObjectWriter(OS, /*Is64Bit=*/true,
- /*IsLittleEndian=*/true,
- /*HasRelocationAddend=*/true);
+ return createELFObjectWriter(new X86ELFObjectWriter(true, OSType,
+ ELF::EM_X86_64, true),
+ OS, /*IsLittleEndian*/ true);
}
};
class WindowsX86AsmBackend : public X86AsmBackend {
bool Is64Bit;
+
public:
WindowsX86AsmBackend(const Target &T, bool is64Bit)
: X86AsmBackend(T)
, Is64Bit(is64Bit) {
- HasScatteredSymbols = true;
}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createWinCOFFObjectWriter(OS, Is64Bit);
}
-
- bool isVirtualSection(const MCSection &Section) const {
- const MCSectionCOFF &SE = static_cast<const MCSectionCOFF&>(Section);
- return SE.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
- }
};
class DarwinX86AsmBackend : public X86AsmBackend {
public:
DarwinX86AsmBackend(const Target &T)
- : X86AsmBackend(T) {
- HasAbsolutizedSet = true;
- HasScatteredSymbols = true;
- }
-
- bool isVirtualSection(const MCSection &Section) const {
- const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
- return (SMO.getType() == MCSectionMachO::S_ZEROFILL ||
- SMO.getType() == MCSectionMachO::S_GB_ZEROFILL ||
- SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
- }
+ : X86AsmBackend(T) { }
};
class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
@@ -263,7 +351,9 @@ public:
: DarwinX86AsmBackend(T) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return new MachObjectWriter(OS, /*Is64Bit=*/false);
+ return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_i386,
+ object::mach::CSX86_ALL);
}
};
@@ -275,7 +365,9 @@ public:
}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return new MachObjectWriter(OS, /*Is64Bit=*/true);
+ return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
+ object::mach::CTM_x86_64,
+ object::mach::CSX86_ALL);
}
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -312,7 +404,7 @@ public:
}
};
-} // end anonymous namespace
+} // end anonymous namespace
TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
const std::string &TT) {
@@ -322,9 +414,12 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
case Triple::MinGW32:
case Triple::Cygwin:
case Triple::Win32:
- return new WindowsX86AsmBackend(T, false);
+ if (Triple(TT).getEnvironment() == Triple::MachO)
+ return new DarwinX86_32AsmBackend(T);
+ else
+ return new WindowsX86AsmBackend(T, false);
default:
- return new ELFX86_32AsmBackend(T);
+ return new ELFX86_32AsmBackend(T, Triple(TT).getOS());
}
}
@@ -333,11 +428,14 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
switch (Triple(TT).getOS()) {
case Triple::Darwin:
return new DarwinX86_64AsmBackend(T);
- case Triple::MinGW64:
+ case Triple::MinGW32:
case Triple::Cygwin:
case Triple::Win32:
- return new WindowsX86AsmBackend(T, true);
+ if (Triple(TT).getEnvironment() == Triple::MachO)
+ return new DarwinX86_64AsmBackend(T);
+ else
+ return new WindowsX86AsmBackend(T, true);
default:
- return new ELFX86_64AsmBackend(T);
+ return new ELFX86_64AsmBackend(T, Triple(TT).getOS());
}
}
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 20110ad..99b4479 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
-#include "AsmPrinter/X86ATTInstPrinter.h"
-#include "AsmPrinter/X86IntelInstPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86.h"
#include "X86COFFMachineModuleInfo.h"
@@ -48,21 +48,15 @@ using namespace llvm;
// Primitive Helper Functions.
//===----------------------------------------------------------------------===//
-void X86AsmPrinter::PrintPICBaseSymbol(raw_ostream &O) const {
- const TargetLowering *TLI = TM.getTargetLowering();
- O << *static_cast<const X86TargetLowering*>(TLI)->getPICBaseSymbol(MF,
- OutContext);
-}
-
/// runOnMachineFunction - Emit the function body.
///
bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
- if (Subtarget->isTargetCOFF()) {
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
bool Intrn = MF.getFunction()->hasInternalLinkage();
OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
+ OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
: COFF::IMAGE_SYM_CLASS_EXTERNAL);
OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
<< COFF::SCT_COMPLEX_TYPE_SHIFT);
@@ -95,7 +89,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
break;
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
-
+
MCSymbol *GVSym;
if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
GVSym = GetSymbolWithGlobalValueBase(GV, "$stub");
@@ -109,11 +103,11 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
// Handle dllimport linkage.
if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
-
+
if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
+ MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
@@ -133,7 +127,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
StubSym = MachineModuleInfoImpl::
StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
}
-
+
// If the name begins with a dollar-sign, enclose it in parens. We do this
// to avoid having it look like an integer immediate to the assembler.
if (GVSym->getName()[0] != '$')
@@ -149,7 +143,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
SmallString<128> TempNameStr;
TempNameStr += StringRef(MO.getSymbolName());
TempNameStr += StringRef("$stub");
-
+
MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
@@ -163,17 +157,17 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
} else {
SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName());
}
-
+
// If the name begins with a dollar-sign, enclose it in parens. We do this
// to avoid having it look like an integer immediate to the assembler.
- if (SymToPrint->getName()[0] != '$')
+ if (SymToPrint->getName()[0] != '$')
O << *SymToPrint;
else
O << '(' << *SymToPrint << '(';
break;
}
}
-
+
switch (MO.getTargetFlags()) {
default:
llvm_unreachable("Unknown target flag on GV operand");
@@ -185,15 +179,12 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
// These affect the name of the symbol, not any suffix.
break;
case X86II::MO_GOT_ABSOLUTE_ADDRESS:
- O << " + [.-";
- PrintPICBaseSymbol(O);
- O << ']';
- break;
+ O << " + [.-" << *MF->getPICBaseSymbol() << ']';
+ break;
case X86II::MO_PIC_BASE_OFFSET:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
- O << '-';
- PrintPICBaseSymbol(O);
+ O << '-' << *MF->getPICBaseSymbol();
break;
case X86II::MO_TLSGD: O << "@TLSGD"; break;
case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
@@ -206,8 +197,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
case X86II::MO_PLT: O << "@PLT"; break;
case X86II::MO_TLVP: O << "@TLVP"; break;
case X86II::MO_TLVP_PIC_BASE:
- O << "@TLVP" << '-';
- PrintPICBaseSymbol(O);
+ O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
break;
}
}
@@ -262,7 +252,7 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
case MachineOperand::MO_JumpTableIndex:
case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol: {
O << '$';
printSymbolOperand(MO, O);
@@ -298,10 +288,10 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
BaseReg.getReg() == X86::RIP)
HasBaseReg = false;
-
+
// HasParenPart - True if we will print out the () part of the mem ref.
bool HasParenPart = IndexReg.getReg() || HasBaseReg;
-
+
if (DispSpec.isImm()) {
int DispVal = DispSpec.getImm();
if (DispVal || !HasParenPart)
@@ -312,6 +302,9 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
printSymbolOperand(MI->getOperand(Op+3), O);
}
+ if (Modifier && strcmp(Modifier, "H") == 0)
+ O << "+8";
+
if (HasParenPart) {
assert(IndexReg.getReg() != X86::ESP &&
"X86 doesn't allow scaling by ESP");
@@ -344,10 +337,8 @@ void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op,
raw_ostream &O) {
- PrintPICBaseSymbol(O);
- O << '\n';
- PrintPICBaseSymbol(O);
- O << ':';
+ O << *MF->getPICBaseSymbol() << '\n';
+ O << *MF->getPICBaseSymbol() << ':';
}
bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
@@ -386,14 +377,14 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
const MachineOperand &MO = MI->getOperand(OpNo);
-
+
switch (ExtraCode[0]) {
default: return true; // Unknown modifier.
case 'a': // This is an address. Currently only 'i' and 'r' are expected.
if (MO.isImm()) {
O << MO.getImm();
return false;
- }
+ }
if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
printSymbolOperand(MO, O);
if (Subtarget->isPICStyleRIPRel())
@@ -470,6 +461,9 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
case 'q': // Print SImode register
// These only apply to registers, ignore on mem.
break;
+ case 'H':
+ printMemReference(MI, OpNo, O, "H");
+ return false;
case 'P': // Don't print @PLT, but do print as memory.
printMemReference(MI, OpNo, O, "no-rip");
return false;
@@ -480,23 +474,23 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
- if (Subtarget->isTargetDarwin())
+ if (Subtarget->isTargetEnvMacho())
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
}
void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
- if (Subtarget->isTargetDarwin()) {
+ if (Subtarget->isTargetEnvMacho()) {
// All darwin targets use mach-o.
MachineModuleInfoMachO &MMIMacho =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
-
+
// Output stubs for dynamically-linked functions.
MachineModuleInfoMachO::SymbolListTy Stubs;
Stubs = MMIMacho.GetFnStubList();
if (!Stubs.empty()) {
- const MCSection *TheSection =
+ const MCSection *TheSection =
OutContext.getMachOSection("__IMPORT", "__jump_table",
MCSectionMachO::S_SYMBOL_STUBS |
MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
@@ -514,7 +508,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
const char HltInsts[] = { -12, -12, -12, -12, -12 };
OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
}
-
+
Stubs.clear();
OutStreamer.AddBlankLine();
}
@@ -522,7 +516,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// Output stubs for external and common global variables.
Stubs = MMIMacho.GetGVStubList();
if (!Stubs.empty()) {
- const MCSection *TheSection =
+ const MCSection *TheSection =
OutContext.getMachOSection("__IMPORT", "__pointers",
MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
SectionKind::getMetadata());
@@ -580,7 +574,14 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
- if (Subtarget->isTargetCOFF()) {
+ if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() &&
+ MMI->callsExternalVAFunctionWithFloatingPointArguments()) {
+ StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
+ MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
+ OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
+ }
+
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
X86COFFMachineModuleInfo &COFFMMI =
MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
@@ -661,12 +662,12 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
}
-MachineLocation
+MachineLocation
X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
MachineLocation Location;
assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
// Frame address. Currently handles register +- offset only.
-
+
if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
else {
@@ -690,9 +691,9 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
O << V.getName();
O << " <- ";
// Frame address. Currently handles register +- offset only.
- O << '[';
+ O << '[';
if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
- printOperand(MI, 0, O);
+ printOperand(MI, 0, O);
else
O << "undef";
O << '+'; printOperand(MI, 3, O);
@@ -718,10 +719,10 @@ static MCInstPrinter *createX86MCInstPrinter(const Target &T,
}
// Force static initialization.
-extern "C" void LLVMInitializeX86AsmPrinter() {
+extern "C" void LLVMInitializeX86AsmPrinter() {
RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
-
+
TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter);
}
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index e61be66..3a50435 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -75,8 +75,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O);
- void PrintPICBaseSymbol(raw_ostream &O) const;
-
bool runOnMachineFunction(MachineFunction &F);
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index e3409ef..a44fb69 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -48,7 +48,7 @@ def RetCC_X86Common : CallingConv<[
// MMX vector types are always returned in MM0. If the target doesn't have
// MM0, it doesn't support these vector types.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>,
+ CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>,
// Long double types are always returned in ST0 (even with SSE).
CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
@@ -61,7 +61,7 @@ def RetCC_X86_32_C : CallingConv<[
// weirdly; this is really the sse-regparm calling convention) in which
// case they use XMM0, otherwise it is the same as the common X86 calling
// conv.
- CCIfInReg<CCIfSubtarget<"hasSSE2()",
+ CCIfInReg<CCIfSubtarget<"hasXMMInt()",
CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
CCDelegateTo<RetCC_X86Common>
@@ -73,8 +73,8 @@ def RetCC_X86_32_Fast : CallingConv<[
// SSE2.
// This can happen when a float, 2 x float, or 3 x float vector is split by
// target lowering, and is returned in 1-3 sse regs.
- CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
- CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
// For integers, ECX can be used as an extra return register
CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
@@ -95,14 +95,14 @@ def RetCC_X86_64_C : CallingConv<[
// returned in RAX. This disagrees with ABI documentation but is bug
// compatible with gcc.
CCIfType<[v1i64], CCAssignToReg<[RAX]>>,
- CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>,
+ CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
CCDelegateTo<RetCC_X86Common>
]>;
// X86-Win64 C return-value convention.
def RetCC_X86_Win64_C : CallingConv<[
// The X86-Win64 calling convention always returns __m64 values in RAX.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>,
+ CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
// And FP in XMM0 only.
CCIfType<[f32], CCAssignToReg<[XMM0]>>,
@@ -161,14 +161,14 @@ def CC_X86_64_C : CallingConv<[
// The first 8 MMX (except for v1i64) vector arguments are passed in XMM
// registers on Darwin.
- CCIfType<[v8i8, v4i16, v2i32],
+ CCIfType<[x86mmx],
CCIfSubtarget<"isTargetDarwin()",
- CCIfSubtarget<"hasSSE2()",
+ CCIfSubtarget<"hasXMMInt()",
CCPromoteToType<v2i64>>>>,
// The first 8 FP/Vector arguments are passed in XMM registers.
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfSubtarget<"hasSSE1()",
+ CCIfSubtarget<"hasXMM()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
// The first 8 256-bit vector arguments are passed in YMM registers.
@@ -192,7 +192,7 @@ def CC_X86_64_C : CallingConv<[
CCAssignToStack<32, 32>>,
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
+ CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
]>;
// Calling convention used on Win64
@@ -210,8 +210,7 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
// The first 4 MMX vector arguments are passed in GPRs.
- CCIfType<[v8i8, v4i16, v2i32, v1i64],
- CCBitConvertToType<i64>>,
+ CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
// The first 4 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
@@ -233,7 +232,7 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[f80], CCAssignToStack<0, 0>>,
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
+ CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
]>;
def CC_X86_64_GHC : CallingConv<[
@@ -246,7 +245,7 @@ def CC_X86_64_GHC : CallingConv<[
// Pass in STG registers: F1, F2, F3, F4, D1, D2
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfSubtarget<"hasSSE1()",
+ CCIfSubtarget<"hasXMM()",
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
]>;
@@ -264,12 +263,12 @@ def CC_X86_32_Common : CallingConv<[
// The first 3 float or double arguments, if marked 'inreg' and if the call
// is not a vararg call and if SSE2 is available, are passed in SSE registers.
CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
- CCIfSubtarget<"hasSSE2()",
+ CCIfSubtarget<"hasXMMInt()",
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
// The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
// registers if the call is not a vararg call.
- CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32],
+ CCIfNotVarArg<CCIfType<[x86mmx],
CCAssignToReg<[MM0, MM1, MM2]>>>,
// Integer/Float values get stored in stack slots that are 4 bytes in
@@ -300,7 +299,7 @@ def CC_X86_32_Common : CallingConv<[
// __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
// passed in the parameter area.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>;
+ CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>;
def CC_X86_32_C : CallingConv<[
// Promote i8/i16 arguments to i32.
@@ -363,7 +362,7 @@ def CC_X86_32_FastCC : CallingConv<[
// The first 3 float or double arguments, if the call is not a vararg
// call and if SSE2 is available, are passed in SSE registers.
CCIfNotVarArg<CCIfType<[f32,f64],
- CCIfSubtarget<"hasSSE2()",
+ CCIfSubtarget<"hasXMMInt()",
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
// Doubles get 8-byte slots that are 8-byte aligned.
@@ -380,3 +379,35 @@ def CC_X86_32_GHC : CallingConv<[
// Pass in STG registers: Base, Sp, Hp, R1
CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Root Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// This is the root argument convention for the X86-32 backend.
+def CC_X86_32 : CallingConv<[
+ CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+ CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
+ CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
+
+ // Otherwise, drop to normal X86-32 CC
+ CCDelegateTo<CC_X86_32_C>
+]>;
+
+// This is the root argument convention for the X86-64 backend.
+def CC_X86_64 : CallingConv<[
+ CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
+
+ // Mingw64 and native Win64 use Win64 CC
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+
+ // Otherwise, drop to normal X86-64 CC
+ CCDelegateTo<CC_X86_64_C>
+]>;
+
+// This is the argument convention used for the entire X86 backend.
+def CC_X86 : CallingConv<[
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
+ CCDelegateTo<CC_X86_32>
+]>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 824021c..60d9d4a 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -68,8 +68,7 @@ namespace {
return "X86 Machine Code Emitter";
}
- void emitInstruction(const MachineInstr &MI,
- const TargetInstrDesc *Desc);
+ void emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc);
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -131,7 +130,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I) {
const TargetInstrDesc &Desc = I->getDesc();
emitInstruction(*I, &Desc);
@@ -598,9 +597,23 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
}
template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
+void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
const TargetInstrDesc *Desc) {
DEBUG(dbgs() << MI);
+
+ // If this is a pseudo instruction, lower it.
+ switch (Desc->getOpcode()) {
+ case X86::ADD16rr_DB: Desc = &II->get(X86::OR16rr); MI.setDesc(*Desc);break;
+ case X86::ADD32rr_DB: Desc = &II->get(X86::OR32rr); MI.setDesc(*Desc);break;
+ case X86::ADD64rr_DB: Desc = &II->get(X86::OR64rr); MI.setDesc(*Desc);break;
+ case X86::ADD16ri_DB: Desc = &II->get(X86::OR16ri); MI.setDesc(*Desc);break;
+ case X86::ADD32ri_DB: Desc = &II->get(X86::OR32ri); MI.setDesc(*Desc);break;
+ case X86::ADD64ri32_DB:Desc = &II->get(X86::OR64ri32);MI.setDesc(*Desc);break;
+ case X86::ADD16ri8_DB: Desc = &II->get(X86::OR16ri8);MI.setDesc(*Desc);break;
+ case X86::ADD32ri8_DB: Desc = &II->get(X86::OR32ri8);MI.setDesc(*Desc);break;
+ case X86::ADD64ri8_DB: Desc = &II->get(X86::OR64ri8);MI.setDesc(*Desc);break;
+ }
+
MCE.processDebugLoc(MI.getDebugLoc(), true);
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index f84995d..f1d7ede 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -14,6 +14,7 @@
#include "X86ELFWriterInfo.h"
#include "X86Relocations.h"
#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -24,8 +25,8 @@ using namespace llvm;
// Implementation of the X86ELFWriterInfo class
//===----------------------------------------------------------------------===//
-X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM)
- : TargetELFWriterInfo(TM) {
+X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_)
+ : TargetELFWriterInfo(is64Bit_, isLittleEndian_) {
EMachine = is64Bit ? EM_X86_64 : EM_386;
}
@@ -35,13 +36,13 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
if (is64Bit) {
switch(MachineRelTy) {
case X86::reloc_pcrel_word:
- return R_X86_64_PC32;
+ return ELF::R_X86_64_PC32;
case X86::reloc_absolute_word:
- return R_X86_64_32;
+ return ELF::R_X86_64_32;
case X86::reloc_absolute_word_sext:
- return R_X86_64_32S;
+ return ELF::R_X86_64_32S;
case X86::reloc_absolute_dword:
- return R_X86_64_64;
+ return ELF::R_X86_64_64;
case X86::reloc_picrel_word:
default:
llvm_unreachable("unknown x86_64 machine relocation type");
@@ -49,9 +50,9 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
} else {
switch(MachineRelTy) {
case X86::reloc_pcrel_word:
- return R_386_PC32;
+ return ELF::R_386_PC32;
case X86::reloc_absolute_word:
- return R_386_32;
+ return ELF::R_386_32;
case X86::reloc_absolute_word_sext:
case X86::reloc_absolute_dword:
case X86::reloc_picrel_word:
@@ -66,18 +67,18 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
long int Modifier) const {
if (is64Bit) {
switch(RelTy) {
- case R_X86_64_PC32: return Modifier - 4;
- case R_X86_64_32:
- case R_X86_64_32S:
- case R_X86_64_64:
+ case ELF::R_X86_64_PC32: return Modifier - 4;
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ case ELF::R_X86_64_64:
return Modifier;
default:
llvm_unreachable("unknown x86_64 relocation type");
}
} else {
switch(RelTy) {
- case R_386_PC32: return Modifier - 4;
- case R_386_32: return Modifier;
+ case ELF::R_386_PC32: return Modifier - 4;
+ case ELF::R_386_32: return Modifier;
default:
llvm_unreachable("unknown x86 relocation type");
}
@@ -88,19 +89,19 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
if (is64Bit) {
switch(RelTy) {
- case R_X86_64_PC32:
- case R_X86_64_32:
- case R_X86_64_32S:
+ case ELF::R_X86_64_PC32:
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
return 32;
- case R_X86_64_64:
+ case ELF::R_X86_64_64:
return 64;
default:
llvm_unreachable("unknown x86_64 relocation type");
}
} else {
switch(RelTy) {
- case R_386_PC32:
- case R_386_32:
+ case ELF::R_386_PC32:
+ case ELF::R_386_32:
return 32;
default:
llvm_unreachable("unknown x86 relocation type");
@@ -112,20 +113,20 @@ unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
if (is64Bit) {
switch(RelTy) {
- case R_X86_64_PC32:
+ case ELF::R_X86_64_PC32:
return true;
- case R_X86_64_32:
- case R_X86_64_32S:
- case R_X86_64_64:
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ case ELF::R_X86_64_64:
return false;
default:
llvm_unreachable("unknown x86_64 relocation type");
}
} else {
switch(RelTy) {
- case R_386_PC32:
+ case ELF::R_386_PC32:
return true;
- case R_386_32:
+ case ELF::R_386_32:
return false;
default:
llvm_unreachable("unknown x86 relocation type");
@@ -143,7 +144,7 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
unsigned RelOffset,
unsigned RelTy) const {
- if (RelTy == R_X86_64_PC32 || RelTy == R_386_PC32)
+ if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
return SymOffset - (RelOffset + 4);
else
assert("computeRelocation unknown for this relocation type");
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index 342e6e6..a45b5bb 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -20,25 +20,8 @@ namespace llvm {
class X86ELFWriterInfo : public TargetELFWriterInfo {
- // ELF Relocation types for X86
- enum X86RelocationType {
- R_386_NONE = 0,
- R_386_32 = 1,
- R_386_PC32 = 2
- };
-
- // ELF Relocation types for X86_64
- enum X86_64RelocationType {
- R_X86_64_NONE = 0,
- R_X86_64_64 = 1,
- R_X86_64_PC32 = 2,
- R_X86_64_32 = 10,
- R_X86_64_32S = 11,
- R_X86_64_PC64 = 24
- };
-
public:
- X86ELFWriterInfo(TargetMachine &TM);
+ X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
virtual ~X86ELFWriterInfo();
/// getRelocationType - Returns the target specific ELF Relocation type.
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 0c70eec..9d42ac2 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -36,7 +36,7 @@
using namespace llvm;
namespace {
-
+
class X86FastISel : public FastISel {
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -46,7 +46,7 @@ class X86FastISel : public FastISel {
///
unsigned StackPtr;
- /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
/// floating point ops.
/// When SSE is available, use it for f32 operations.
/// When SSE2 is available, use it for f64 operations.
@@ -63,11 +63,18 @@ public:
virtual bool TargetSelectInstruction(const Instruction *I);
+ /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+ /// vreg is being provided by the specified load instruction. If possible,
+ /// try to fold the load as an operand to the instruction, returning true if
+ /// possible.
+ virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
+
#include "X86GenFastISel.inc"
private:
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
-
+
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
bool X86FastEmitStore(EVT VT, const Value *Val,
@@ -77,12 +84,12 @@ private:
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
-
+
bool X86SelectAddress(const Value *V, X86AddressMode &AM);
bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
bool X86SelectLoad(const Instruction *I);
-
+
bool X86SelectStore(const Instruction *I);
bool X86SelectRet(const Instruction *I);
@@ -98,7 +105,7 @@ private:
bool X86SelectSelect(const Instruction *I);
bool X86SelectTrunc(const Instruction *I);
-
+
bool X86SelectFPExt(const Instruction *I);
bool X86SelectFPTrunc(const Instruction *I);
@@ -107,9 +114,6 @@ private:
bool X86VisitIntrinsicCall(const IntrinsicInst &I);
bool X86SelectCall(const Instruction *I);
- CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
- CCAssignFn *CCAssignFnForRet(CallingConv::ID CC, bool isTailCall = false);
-
const X86InstrInfo *getInstrInfo() const {
return getTargetMachine()->getInstrInfo();
}
@@ -128,17 +132,18 @@ private:
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
- bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false);
+ bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
};
-
+
} // end anonymous namespace.
-bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
- VT = TLI.getValueType(Ty, /*HandleUnknown=*/true);
- if (VT == MVT::Other || !VT.isSimple())
+bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+ EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+ if (evt == MVT::Other || !evt.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
-
+
+ VT = evt.getSimpleVT();
// For now, require SSE/SSE2 for performing floating-point operations,
// since x87 requires additional work.
if (VT == MVT::f64 && !X86ScalarSSEf64)
@@ -157,45 +162,6 @@ bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
#include "X86GenCallingConv.inc"
-/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
-/// convention.
-CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
- bool isTaillCall) {
- if (Subtarget->is64Bit()) {
- if (CC == CallingConv::GHC)
- return CC_X86_64_GHC;
- else if (Subtarget->isTargetWin64())
- return CC_X86_Win64_C;
- else
- return CC_X86_64_C;
- }
-
- if (CC == CallingConv::X86_FastCall)
- return CC_X86_32_FastCall;
- else if (CC == CallingConv::X86_ThisCall)
- return CC_X86_32_ThisCall;
- else if (CC == CallingConv::Fast)
- return CC_X86_32_FastCC;
- else if (CC == CallingConv::GHC)
- return CC_X86_32_GHC;
- else
- return CC_X86_32_C;
-}
-
-/// CCAssignFnForRet - Selects the correct CCAssignFn for a given calling
-/// convention.
-CCAssignFn *X86FastISel::CCAssignFnForRet(CallingConv::ID CC,
- bool isTaillCall) {
- if (Subtarget->is64Bit()) {
- if (Subtarget->isTargetWin64())
- return RetCC_X86_Win64_C;
- else
- return RetCC_X86_64_C;
- }
-
- return RetCC_X86_32_C;
-}
-
/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
@@ -284,7 +250,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
break;
}
-
+
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(Opc)), AM).addReg(Val);
return true;
@@ -295,7 +261,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
-
+
// If this is a store of a simple constant, fold the constant into the store.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
unsigned Opc = 0;
@@ -312,7 +278,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
Opc = X86::MOV64mi32;
break;
}
-
+
if (Opc) {
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(Opc)), AM)
@@ -321,11 +287,11 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
return true;
}
}
-
+
unsigned ValReg = getRegForValue(Val);
if (ValReg == 0)
- return false;
-
+ return false;
+
return X86FastEmitStore(VT, ValReg, AM);
}
@@ -337,7 +303,7 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
unsigned &ResultReg) {
unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
Src, /*TODO: Kill=*/false);
-
+
if (RR != 0) {
ResultReg = RR;
return true;
@@ -354,11 +320,11 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Don't walk into other basic blocks; it's possible we haven't
// visited them yet, so the instructions may not yet be assigned
// virtual registers.
- if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
- return false;
-
- Opcode = I->getOpcode();
- U = I;
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
Opcode = C->getOpcode();
U = C;
@@ -472,7 +438,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
AM.Disp = (uint32_t)Disp;
if (X86SelectAddress(U->getOperand(0), AM))
return true;
-
+
// If we couldn't merge the sub value into this addr mode, revert back to
// our address and just match the value instead of completely failing.
AM = SavedAM;
@@ -501,7 +467,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Okay, we've committed to selecting this global. Set up the basic address.
AM.GV = GV;
-
+
// Allow the subtarget to classify the global.
unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
@@ -510,7 +476,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// FIXME: How do we know Base.Reg is free??
AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
}
-
+
// Unless the ABI requires an extra load, return a direct reference to
// the global.
if (!isGlobalStubReference(GVFlags)) {
@@ -523,7 +489,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
AM.GVOpFlags = GVFlags;
return true;
}
-
+
// Ok, we need to do a load from a stub. If we've already loaded from this
// stub, reuse the loaded pointer, otherwise emit the load now.
DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
@@ -545,14 +511,14 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
if (TLI.getPointerTy() == MVT::i64) {
Opc = X86::MOV64rm;
RC = X86::GR64RegisterClass;
-
+
if (Subtarget->isPICStyleRIPRel())
StubAM.Base.Reg = X86::RIP;
} else {
Opc = X86::MOV32rm;
RC = X86::GR32RegisterClass;
}
-
+
LoadReg = createResultReg(RC);
MachineInstrBuilder LoadMI =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
@@ -564,7 +530,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Prevent loading GV stub multiple times in same MBB.
LocalValueMap[V] = LoadReg;
}
-
+
// Now construct the final address. Note that the Disp, Scale,
// and Index values may already be set here.
AM.Base.Reg = LoadReg;
@@ -638,7 +604,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
// Okay, we've committed to selecting this global. Set up the basic address.
AM.GV = GV;
-
+
// No ABI requires an extra load for anything other than DLLImport, which
// we rejected above. Return a direct reference to the global.
if (Subtarget->isPICStyleRIPRel()) {
@@ -651,7 +617,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
} else if (Subtarget->isPICStyleGOT()) {
AM.GVOpFlags = X86II::MO_GOTOFF;
}
-
+
return true;
}
@@ -674,7 +640,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
/// X86SelectStore - Select and emit code to implement store instructions.
bool X86FastISel::X86SelectStore(const Instruction *I) {
- EVT VT;
+ MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
return false;
@@ -724,7 +690,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
- CCInfo.AnalyzeReturn(Outs, CCAssignFnForRet(CC));
+ CCInfo.AnalyzeReturn(Outs, RetCC_X86);
const Value *RV = Ret->getOperand(0);
unsigned Reg = getRegForValue(RV);
@@ -736,7 +702,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
return false;
CCValAssign &VA = ValLocs[0];
-
+
// Don't bother handling odd stuff for now.
if (VA.getLocInfo() != CCValAssign::Full)
return false;
@@ -745,7 +711,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
return false;
// TODO: For now, don't try to handle cases where getLocInfo()
// says Full but the types don't match.
- if (VA.getValVT() != TLI.getValueType(RV->getType()))
+ if (TLI.getValueType(RV->getType()) != VA.getValVT())
return false;
// The calling-convention tables for x87 returns don't tell
@@ -775,7 +741,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
/// X86SelectLoad - Select and emit code to implement load instructions.
///
bool X86FastISel::X86SelectLoad(const Instruction *I) {
- EVT VT;
+ MVT VT;
if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
return false;
@@ -826,11 +792,11 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
EVT VT) {
unsigned Op0Reg = getRegForValue(Op0);
if (Op0Reg == 0) return false;
-
+
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Op1))
Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
-
+
// We have two options: compare with register or immediate. If the RHS of
// the compare is an immediate that we can fold into this compare, use
// CMPri, otherwise use CMPrr.
@@ -842,23 +808,23 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
return true;
}
}
-
+
unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
if (CompareOpc == 0) return false;
-
+
unsigned Op1Reg = getRegForValue(Op1);
if (Op1Reg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
.addReg(Op0Reg)
.addReg(Op1Reg);
-
+
return true;
}
bool X86FastISel::X86SelectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
- EVT VT;
+ MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT))
return false;
@@ -869,13 +835,13 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
case CmpInst::FCMP_OEQ: {
if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
return false;
-
+
unsigned EReg = createResultReg(&X86::GR8RegClass);
unsigned NPReg = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::SETNPr), NPReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
UpdateValueMap(I, ResultReg);
return true;
@@ -908,7 +874,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break;
case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
-
+
case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
@@ -930,7 +896,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
// Emit a compare of Op0/Op1.
if (!X86FastEmitCompare(Op0, Op1, VT))
return false;
-
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
UpdateValueMap(I, ResultReg);
return true;
@@ -995,7 +961,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break;
case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
-
+
case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
@@ -1009,7 +975,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
default:
return false;
}
-
+
const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
if (SwapArgs)
std::swap(Op0, Op1);
@@ -1017,7 +983,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Emit a compare of the LHS and RHS, setting the flags.
if (!X86FastEmitCompare(Op0, Op1, VT))
return false;
-
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
.addMBB(TrueMBB);
@@ -1070,8 +1036,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
}
const TargetInstrDesc &TID = MI.getDesc();
- if (TID.hasUnmodeledSideEffects() ||
- TID.hasImplicitDefOfPhysReg(X86::EFLAGS))
+ if (TID.hasImplicitDefOfPhysReg(X86::EFLAGS) ||
+ MI.hasUnmodeledSideEffects())
break;
}
@@ -1147,22 +1113,22 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
return false;
}
- EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
- if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
return false;
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (Op0Reg == 0) return false;
-
+
// Fold immediate in shl(x,3).
if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
UpdateValueMap(I, ResultReg);
return true;
}
-
+
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
@@ -1183,23 +1149,26 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
}
bool X86FastISel::X86SelectSelect(const Instruction *I) {
- EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
- if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
return false;
-
+
+ // We only use cmov here, if we don't have a cmov instruction bail.
+ if (!Subtarget->hasCMov()) return false;
+
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
- if (VT.getSimpleVT() == MVT::i16) {
+ if (VT == MVT::i16) {
Opc = X86::CMOVE16rr;
RC = &X86::GR16RegClass;
- } else if (VT.getSimpleVT() == MVT::i32) {
+ } else if (VT == MVT::i32) {
Opc = X86::CMOVE32rr;
RC = &X86::GR32RegClass;
- } else if (VT.getSimpleVT() == MVT::i64) {
+ } else if (VT == MVT::i64) {
Opc = X86::CMOVE64rr;
RC = &X86::GR64RegClass;
} else {
- return false;
+ return false;
}
unsigned Op0Reg = getRegForValue(I->getOperand(0));
@@ -1264,7 +1233,7 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
return false;
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
-
+
// This code only handles truncation to byte right now.
if (DstVT != MVT::i8 && DstVT != MVT::i1)
// All other cases should be handled by the tblgen generated code.
@@ -1335,21 +1304,21 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// Grab the frame index.
X86AddressMode AM;
if (!X86SelectAddress(Slot, AM)) return false;
-
+
if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
-
+
return true;
}
case Intrinsic::objectsize: {
ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
const Type *Ty = I.getCalledFunction()->getReturnType();
-
+
assert(CI && "Non-constant type in Intrinsic::objectsize?");
-
- EVT VT;
+
+ MVT VT;
if (!isTypeLegal(Ty, VT))
return false;
-
+
unsigned OpC = 0;
if (VT == MVT::i32)
OpC = X86::MOV32ri;
@@ -1357,7 +1326,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
OpC = X86::MOV64ri;
else
return false;
-
+
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg).
addImm(CI->isZero() ? -1ULL : 0);
@@ -1392,7 +1361,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
const Type *RetTy =
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
- EVT VT;
+ MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
@@ -1429,7 +1398,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
ResultReg = DestReg1+1;
else
ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
-
+
unsigned Opc = X86::SETBr;
if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
Opc = X86::SETOr;
@@ -1476,7 +1445,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Handle *simple* calls for now.
const Type *RetTy = CS.getType();
- EVT RetVT;
+ MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
else if (!isTypeLegal(RetTy, RetVT, true))
@@ -1506,7 +1475,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Deal with call operands first.
SmallVector<const Value *, 8> ArgVals;
SmallVector<unsigned, 8> Args;
- SmallVector<EVT, 8> ArgVTs;
+ SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
Args.reserve(CS.arg_size());
ArgVals.reserve(CS.arg_size());
@@ -1532,7 +1501,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
return false;
const Type *ArgTy = (*i)->getType();
- EVT ArgVT;
+ MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
@@ -1547,13 +1516,13 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
-
+
// Allocate shadow area for Win64
- if (Subtarget->isTargetWin64()) {
- CCInfo.AllocateStack(32, 8);
+ if (Subtarget->isTargetWin64()) {
+ CCInfo.AllocateStack(32, 8);
}
- CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -1570,7 +1539,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
CCValAssign &VA = ArgLocs[i];
unsigned Arg = Args[VA.getValNo()];
EVT ArgVT = ArgVTs[VA.getValNo()];
-
+
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
@@ -1578,20 +1547,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
case CCValAssign::SExt: {
bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
- Emitted = true;
+ assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
ArgVT = VA.getLocVT();
break;
}
case CCValAssign::ZExt: {
bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
- Emitted = true;
+ assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
ArgVT = VA.getLocVT();
break;
}
case CCValAssign::AExt: {
+ // We don't handle MMX parameters yet.
+ if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() == 128)
+ return false;
bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
if (!Emitted)
@@ -1600,21 +1570,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (!Emitted)
Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
-
- assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
+
+ assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
ArgVT = VA.getLocVT();
break;
}
case CCValAssign::BCvt: {
- unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
- ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false);
+ unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
+ ISD::BITCAST, Arg, /*TODO: Kill=*/false);
assert(BC != 0 && "Failed to emit a bitcast!");
Arg = BC;
ArgVT = VA.getLocVT();
break;
}
}
-
+
if (VA.isRegLoc()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
VA.getLocReg()).addReg(Arg);
@@ -1625,7 +1595,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
AM.Base.Reg = StackPtr;
AM.Disp = LocMemOffset;
const Value *ArgVal = ArgVals[VA.getValNo()];
-
+
// If this is a really simple value, emit this with the Value* version of
// X86FastEmitStore. If it isn't simple, we don't want to do this, as it
// can cause us to reevaluate the argument.
@@ -1637,13 +1607,13 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
}
// ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
+ // GOT pointer.
if (Subtarget->isPICStyleGOT()) {
unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
X86::EBX).addReg(Base);
}
-
+
// Issue the call.
MachineInstrBuilder MIB;
if (CalleeOp) {
@@ -1657,7 +1627,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
CallOpc = X86::CALL32r;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
.addReg(CalleeOp);
-
+
} else {
// Direct call.
assert(GV && "Not a direct call");
@@ -1668,10 +1638,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
CallOpc = X86::CALL64pcrel32;
else
CallOpc = X86::CALLpcrel32;
-
+
// See if we need any target-specific flags on the GV operand.
unsigned char OpFlags = 0;
-
+
// On ELF targets, in both X86-64 and X86-32 mode, direct calls to
// external symbols most go through the PLT in PIC mode. If the symbol
// has hidden or protected visibility, or if it is static or local, then
@@ -1688,8 +1658,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// automatically synthesizes these stubs.
OpFlags = X86II::MO_DARWIN_STUB;
}
-
-
+
+
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
.addGlobalAddress(GV, 0, OpFlags);
}
@@ -1709,7 +1679,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Now handle call return value (if any).
SmallVector<unsigned, 4> UsedRegs;
- if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
+ if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
@@ -1718,7 +1688,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
EVT CopyVT = RVLocs[0].getValVT();
TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
-
+
// If this is a call to a function that returns an fp value on the x87 fp
// stack, but where we prefer to use the value in xmm registers, copy it
// out as F80 and use a truncate to move it from fp stack reg to xmm reg.
@@ -1756,7 +1726,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (AndToI1) {
// Mask out all but lowest bit for some call which produces an i1.
unsigned AndResult = createResultReg(X86::GR8RegisterClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
ResultReg = AndResult;
}
@@ -1823,14 +1793,14 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
}
unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
- EVT VT;
+ MVT VT;
if (!isTypeLegal(C->getType(), VT))
return false;
-
+
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: return false;
case MVT::i8:
Opc = X86::MOV8rm;
@@ -1871,7 +1841,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
// No f80 support yet.
return false;
}
-
+
// Materialize addresses with LEA instructions.
if (isa<GlobalValue>(C)) {
X86AddressMode AM;
@@ -1887,14 +1857,14 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
}
return 0;
}
-
+
// MachineConstantPool wants an explicit alignment.
unsigned Align = TD.getPrefTypeAlignment(C->getType());
if (Align == 0) {
// Alignment of vector types. FIXME!
Align = TD.getTypeAllocSize(C->getType());
}
-
+
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
unsigned char OpFlag = 0;
@@ -1941,6 +1911,34 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
return ResultReg;
}
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction. If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ X86AddressMode AM;
+ if (!X86SelectAddress(LI->getOperand(0), AM))
+ return false;
+
+ X86InstrInfo &XII = (X86InstrInfo&)TII;
+
+ unsigned Size = TD.getTypeAllocSize(LI->getType());
+ unsigned Alignment = LI->getAlignment();
+
+ SmallVector<MachineOperand, 8> AddrOps;
+ AM.getFullAddress(AddrOps);
+
+ MachineInstr *Result =
+ XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+ if (Result == 0) return false;
+
+ FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
+ MI->eraseFromParent();
+ return true;
+}
+
+
namespace llvm {
llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
return new X86FastISel(funcInfo);
diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h
index 96e0aae..17d242a 100644
--- a/lib/Target/X86/X86FixupKinds.h
+++ b/lib/Target/X86/X86FixupKinds.h
@@ -15,11 +15,17 @@
namespace llvm {
namespace X86 {
enum Fixups {
- reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch.
- reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1
- reloc_pcrel_2byte, // 16-bit pcrel, e.g. callw
- reloc_riprel_4byte, // 32-bit rip-relative
- reloc_riprel_4byte_movq_load // 32-bit rip-relative in movq
+ reloc_riprel_4byte = FirstTargetFixupKind, // 32-bit rip-relative
+ reloc_riprel_4byte_movq_load, // 32-bit rip-relative in movq
+ reloc_signed_4byte, // 32-bit signed. Unlike FK_Data_4
+ // this will be sign extended at
+ // runtime.
+ reloc_global_offset_table, // 32-bit, relative to the start
+ // of the instruction. Used only
+ // for _GLOBAL_OFFSET_TABLE_.
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
}
}
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index e6ebf66..3aaa693 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -32,6 +32,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -51,6 +52,7 @@ namespace {
struct FPS : public MachineFunctionPass {
static char ID;
FPS() : MachineFunctionPass(ID) {
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
// This is really only to keep valgrind quiet.
// The logic in isLive() is too much for it.
memset(Stack, 0, sizeof(Stack));
@@ -59,6 +61,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<EdgeBundles>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -94,7 +97,7 @@ namespace {
// FixStack[i] == getStackEntry(i) for all i < FixCount.
unsigned char FixStack[8];
- LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {}
+ LiveBundle() : Mask(0), FixCount(0) {}
// Have the live registers been assigned a stack order yet?
bool isFixed() const { return !Mask || FixCount; }
@@ -104,10 +107,8 @@ namespace {
// with no live FP registers.
SmallVector<LiveBundle, 8> LiveBundles;
- // Map each MBB in the current function to an (ingoing, outgoing) index into
- // LiveBundles. Blocks with no FP registers live in or out map to (0, 0)
- // and are not actually stored in the map.
- DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle;
+ // The edge bundle analysis provides indices into the LiveBundles vector.
+ EdgeBundles *Bundles;
// Return a bitmask of FP registers in block's live-in list.
unsigned calcLiveInMask(MachineBasicBlock *MBB) {
@@ -167,7 +168,8 @@ namespace {
/// getStackEntry - Return the X86::FP<n> register in register ST(i).
unsigned getStackEntry(unsigned STi) const {
- assert(STi < StackTop && "Access past stack top!");
+ if (STi >= StackTop)
+ report_fatal_error("Access past stack top!");
return Stack[StackTop-1-STi];
}
@@ -180,7 +182,8 @@ namespace {
// pushReg - Push the specified FP<n> register onto the stack.
void pushReg(unsigned Reg) {
assert(Reg < 8 && "Register number out of range!");
- assert(StackTop < 8 && "Stack overflow!");
+ if (StackTop >= 8)
+ report_fatal_error("Stack overflow!");
Stack[StackTop] = Reg;
RegMap[Reg] = StackTop++;
}
@@ -197,7 +200,8 @@ namespace {
std::swap(RegMap[RegNo], RegMap[RegOnTop]);
// Swap stack slot contents.
- assert(RegMap[RegOnTop] < StackTop);
+ if (RegMap[RegOnTop] >= StackTop)
+ report_fatal_error("Access past stack top!");
std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
// Emit an fxch to update the runtime processors version of the state.
@@ -281,6 +285,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
// Early exit.
if (!FPIsUsed) return false;
+ Bundles = &getAnalysis<EdgeBundles>();
TII = MF.getTarget().getInstrInfo();
// Prepare cross-MBB liveness.
@@ -305,7 +310,6 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
if (Processed.insert(BB))
Changed |= processBasicBlock(MF, *BB);
- BlockBundle.clear();
LiveBundles.clear();
return Changed;
@@ -318,90 +322,16 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
/// registers may be implicitly defined, or not used by all successors.
void FPS::bundleCFG(MachineFunction &MF) {
assert(LiveBundles.empty() && "Stale data in LiveBundles");
- assert(BlockBundle.empty() && "Stale data in BlockBundle");
- SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp;
+ LiveBundles.resize(Bundles->getNumBundles());
- // LiveBundle[0] is the empty live-in set.
- LiveBundles.resize(1);
-
- // First gather the actual live-in masks for all MBBs.
+ // Gather the actual live-in masks for all MBBs.
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = I;
const unsigned Mask = calcLiveInMask(MBB);
if (!Mask)
continue;
- // Ingoing bundle index.
- unsigned &Idx = BlockBundle[MBB].first;
- // Already assigned an ingoing bundle?
- if (Idx)
- continue;
- // Allocate a new LiveBundle struct for this block's live-ins.
- const unsigned BundleIdx = Idx = LiveBundles.size();
- DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#"
- << MBB->getNumber());
- LiveBundles.push_back(Mask);
- LiveBundle &Bundle = LiveBundles.back();
-
- // Make sure all predecessors have the same live-out set.
- PropUp.insert(MBB);
-
- // Keep pushing liveness up and down the CFG until convergence.
- // Only critical edges cause iteration here, but when they do, multiple
- // blocks can be assigned to the same LiveBundle index.
- do {
- // Assign BundleIdx as liveout from predecessors in PropUp.
- for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(),
- E = PropUp.end(); I != E; ++I) {
- MachineBasicBlock *MBB = *I;
- for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(),
- LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) {
- MachineBasicBlock *PredMBB = *LinkI;
- // PredMBB's liveout bundle should be set to LIIdx.
- unsigned &Idx = BlockBundle[PredMBB].second;
- if (Idx) {
- assert(Idx == BundleIdx && "Inconsistent CFG");
- continue;
- }
- Idx = BundleIdx;
- DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber());
- // Propagate to siblings.
- if (PredMBB->succ_size() > 1)
- PropDown.insert(PredMBB);
- }
- }
- PropUp.clear();
-
- // Assign BundleIdx as livein to successors in PropDown.
- for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(),
- E = PropDown.end(); I != E; ++I) {
- MachineBasicBlock *MBB = *I;
- for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(),
- LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) {
- MachineBasicBlock *SuccMBB = *LinkI;
- // LinkMBB's livein bundle should be set to BundleIdx.
- unsigned &Idx = BlockBundle[SuccMBB].first;
- if (Idx) {
- assert(Idx == BundleIdx && "Inconsistent CFG");
- continue;
- }
- Idx = BundleIdx;
- DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber());
- // Propagate to siblings.
- if (SuccMBB->pred_size() > 1)
- PropUp.insert(SuccMBB);
- // Also accumulate the bundle liveness mask from the liveins here.
- Bundle.Mask |= calcLiveInMask(SuccMBB);
- }
- }
- PropDown.clear();
- } while (!PropUp.empty());
- DEBUG({
- dbgs() << " live:";
- for (unsigned i = 0; i < 8; ++i)
- if (Bundle.Mask & (1<<i))
- dbgs() << " %FP" << i;
- dbgs() << '\n';
- });
+ // Update MBB ingoing bundle mask.
+ LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
}
}
@@ -489,13 +419,15 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
return Changed;
}
-/// setupBlockStack - Use the BlockBundle map to set up our model of the stack
+/// setupBlockStack - Use the live bundles to set up our model of the stack
/// to match predecessors' live out stack.
void FPS::setupBlockStack() {
DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
<< " derived from " << MBB->getName() << ".\n");
StackTop = 0;
- const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first];
+ // Get the live-in bundle for MBB.
+ const LiveBundle &Bundle =
+ LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
if (!Bundle.Mask) {
DEBUG(dbgs() << "Block has no FP live-ins.\n");
@@ -532,7 +464,8 @@ void FPS::finishBlockStack() {
DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
<< " derived from " << MBB->getName() << ".\n");
- unsigned BundleIdx = BlockBundle.lookup(MBB).second;
+ // Get MBB's live-out bundle.
+ unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
LiveBundle &Bundle = LiveBundles[BundleIdx];
// We may need to kill and define some registers to match successors.
@@ -572,7 +505,8 @@ namespace {
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
}
- friend bool ATTRIBUTE_USED operator<(unsigned V, const TableEntry &TE) {
+ friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V,
+ const TableEntry &TE) {
return V < TE.from;
}
};
@@ -824,7 +758,8 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
MachineInstr* MI = I;
DebugLoc dl = MI->getDebugLoc();
ASSERT_SORTED(PopTable);
- assert(StackTop > 0 && "Cannot pop empty stack!");
+ if (StackTop == 0)
+ report_fatal_error("Cannot pop empty stack!");
RegMap[Stack[--StackTop]] = ~0; // Update state
// Check to see if there is a popping version of this instruction...
@@ -1016,7 +951,8 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
MI->getOpcode() == X86::ISTT_FP32m ||
MI->getOpcode() == X86::ISTT_FP64m ||
MI->getOpcode() == X86::ST_FP80m) {
- assert(StackTop > 0 && "Stack empty??");
+ if (StackTop == 0)
+ report_fatal_error("Stack empty??");
--StackTop;
} else if (KillsSrc) { // Last use of operand?
popStackAfter(I);
@@ -1047,7 +983,8 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
// If this is the last use of the source register, just make sure it's on
// the top of the stack.
moveToTop(Reg, I);
- assert(StackTop > 0 && "Stack cannot be empty!");
+ if (StackTop == 0)
+ report_fatal_error("Stack cannot be empty!");
--StackTop;
pushReg(getFPReg(MI->getOperand(0)));
} else {
@@ -1300,7 +1237,6 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
///
void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
MachineInstr *MI = I;
- DebugLoc dl = MI->getDebugLoc();
switch (MI->getOpcode()) {
default: llvm_unreachable("Unknown SpecialFP instruction!");
case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
@@ -1341,7 +1277,8 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
std::swap(RegMap[RegNo], RegMap[RegOnTop]);
// Swap stack slot contents.
- assert(RegMap[RegOnTop] < StackTop);
+ if (RegMap[RegOnTop] >= StackTop)
+ report_fatal_error("Access past stack top!");
std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
break;
}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
new file mode 100644
index 0000000..0a3f931
--- /dev/null
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -0,0 +1,994 @@
+//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86FrameLowering.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallSet.h"
+
+using namespace llvm;
+
+// FIXME: completely move here.
+extern cl::opt<bool> ForceStackAlign;
+
+bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineModuleInfo &MMI = MF.getMMI();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+ return (DisableFramePointerElim(MF) ||
+ RI->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken() ||
+ MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+ MMI.callsUnwindInit());
+}
+
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::SUB64ri8;
+ return X86::SUB64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::SUB32ri8;
+ return X86::SUB32ri;
+ }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::ADD64ri8;
+ return X86::ADD64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::ADD32ri8;
+ return X86::ADD32ri;
+ }
+}
+
+/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
+/// when it reaches the "return" instruction. We can then pop a stack object
+/// to this register without worry about clobbering it.
+static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetRegisterInfo &TRI,
+ bool Is64Bit) {
+ const MachineFunction *MF = MBB.getParent();
+ const Function *F = MF->getFunction();
+ if (!F || MF->getMMI().callsEHReturn())
+ return 0;
+
+ static const unsigned CallerSavedRegs32Bit[] = {
+ X86::EAX, X86::EDX, X86::ECX
+ };
+
+ static const unsigned CallerSavedRegs64Bit[] = {
+ X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
+ X86::R8, X86::R9, X86::R10, X86::R11
+ };
+
+ unsigned Opc = MBBI->getOpcode();
+ switch (Opc) {
+ default: return 0;
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ SmallSet<unsigned, 8> Uses;
+ for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MBBI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
+ Uses.insert(*AsI);
+ }
+
+ const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
+ for (; *CS; ++CS)
+ if (!Uses.count(*CS))
+ return *CS;
+ }
+ }
+
+ return 0;
+}
+
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, int64_t NumBytes,
+ bool Is64Bit, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI) {
+ bool isSub = NumBytes < 0;
+ uint64_t Offset = isSub ? -NumBytes : NumBytes;
+ unsigned Opc = isSub ?
+ getSUBriOpcode(Is64Bit, Offset) :
+ getADDriOpcode(Is64Bit, Offset);
+ uint64_t Chunk = (1LL << 31) - 1;
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ while (Offset) {
+ uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+ if (ThisVal == (Is64Bit ? 8 : 4)) {
+ // Use push / pop instead.
+ unsigned Reg = isSub
+ ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
+ : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+ if (Reg) {
+ Opc = isSub
+ ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
+ : (Is64Bit ? X86::POP64r : X86::POP32r);
+ BuildMI(MBB, MBBI, DL, TII.get(Opc))
+ .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
+ Offset -= ThisVal;
+ continue;
+ }
+ }
+
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(ThisVal);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ Offset -= ThisVal;
+ }
+}
+
+/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
+static
+void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ if (MBBI == MBB.begin()) return;
+
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ }
+}
+
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
+static
+void mergeSPUpdatesDown(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ // FIXME: THIS ISN'T RUN!!!
+ return;
+
+ if (MBBI == MBB.end()) return;
+
+ MachineBasicBlock::iterator NI = llvm::next(MBBI);
+ if (NI == MBB.end()) return;
+
+ unsigned Opc = NI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ }
+}
+
+/// mergeSPUpdates - Checks the instruction before/after the passed
+/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD and a negative for
+/// SUB.
+static int mergeSPUpdates(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr,
+ bool doMergeWithPrevious) {
+ if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
+ (!doMergeWithPrevious && MBBI == MBB.end()))
+ return 0;
+
+ MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
+ MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
+ unsigned Opc = PI->getOpcode();
+ int Offset = 0;
+
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr){
+ Offset += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ Offset -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ }
+
+ return Offset;
+}
+
+static bool isEAXLiveIn(MachineFunction &MF) {
+ for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
+ EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
+ unsigned Reg = II->first;
+
+ if (Reg == X86::EAX || Reg == X86::AX ||
+ Reg == X86::AH || Reg == X86::AL)
+ return true;
+ }
+
+ return false;
+}
+
+void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
+ MCSymbol *Label,
+ unsigned FramePtr) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ if (CSI.empty()) return;
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ const TargetData *TD = TM.getTargetData();
+ bool HasFP = hasFP(MF);
+
+ // Calculate amount of bytes used for return address storing.
+ int stackGrowth = -TD->getPointerSize();
+
+ // FIXME: This is dirty hack. The code itself is pretty mess right now.
+ // It should be rewritten from scratch and generalized sometimes.
+
+ // Determine maximum offset (minumum due to stack growth).
+ int64_t MaxOffset = 0;
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I)
+ MaxOffset = std::min(MaxOffset,
+ MFI->getObjectOffset(I->getFrameIdx()));
+
+ // Calculate offsets.
+ int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+ unsigned Reg = I->getReg();
+ Offset = MaxOffset - Offset + saveAreaOffset;
+
+ // Don't output a new machine move if we're re-saving the frame
+ // pointer. This happens when the PrologEpilogInserter has inserted an extra
+ // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
+ // generates one when frame pointers are used. If we generate a "machine
+ // move" for this extra "PUSH", the linker will lose track of the fact that
+ // the frame pointer should have the value of the first "PUSH" when it's
+ // trying to unwind.
+ //
+ // FIXME: This looks inelegant. It's possibly correct, but it's covering up
+ // another bug. I.e., one where we generate a prolog like this:
+ //
+ // pushl %ebp
+ // movl %esp, %ebp
+ // pushl %ebp
+ // pushl %esi
+ // ...
+ //
+ // The immediate re-push of EBP is unnecessary. At the least, it's an
+ // optimization bug. EBP can be used as a scratch register in certain
+ // cases, but probably not when we have a frame pointer.
+ if (HasFP && FramePtr == Reg)
+ continue;
+
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+ }
+}
+
+/// emitPrologue - Push callee-saved registers onto the stack, which
+/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
+/// space for local variables. Also emit labels used by the exception handler to
+/// generate the exception handling frames.
+void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *Fn = MF.getFunction();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ !Fn->doesNotThrow() || UnwindTablesMandatory;
+ uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
+ uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
+ bool HasFP = hasFP(MF);
+ bool Is64Bit = STI.is64Bit();
+ bool IsWin64 = STI.isTargetWin64();
+ unsigned StackAlign = getStackAlignment();
+ unsigned SlotSize = RegInfo->getSlotSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+
+ DebugLoc DL;
+
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum SlotSize.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else if (MaxAlign < SlotSize)
+ MaxAlign = SlotSize;
+ }
+
+ // Add RETADDR move area to callee saved frame size.
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ X86FI->setCalleeSavedFrameSize(
+ X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
+
+ // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
+ // function, and use up to 128 bytes of stack space, don't have a frame
+ // pointer, calls, or dynamic alloca then we do not need to adjust the
+ // stack pointer (we fit in the Red Zone).
+ if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
+ !RegInfo->needsStackRealignment(MF) &&
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ !IsWin64) { // Win64 has no Red Zone
+ uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
+ if (HasFP) MinSize += SlotSize;
+ StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
+ MFI->setStackSize(StackSize);
+ }
+
+ // Insert stack pointer adjustment for later moving of return addr. Only
+ // applies to tail call optimized functions where the callee argument stack
+ // size is bigger than the callers.
+ if (TailCallReturnAddrDelta < 0) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(-TailCallReturnAddrDelta);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ }
+
+ // Mapping for machine moves:
+ //
+ // DST: VirtualFP AND
+ // SRC: VirtualFP => DW_CFA_def_cfa_offset
+ // ELSE => DW_CFA_def_cfa
+ //
+ // SRC: VirtualFP AND
+ // DST: Register => DW_CFA_def_cfa_register
+ //
+ // ELSE
+ // OFFSET < 0 => DW_CFA_offset_extended_sf
+ // REG < 64 => DW_CFA_offset + Reg
+ // ELSE => DW_CFA_offset_extended
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ const TargetData *TD = MF.getTarget().getTargetData();
+ uint64_t NumBytes = 0;
+ int stackGrowth = -TD->getPointerSize();
+
+ if (HasFP) {
+ // Calculate required stack adjustment.
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (RegInfo->needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+
+ NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+
+ // Get the offset of the stack slot for the EBP register, which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save EBP/RBP into the appropriate stack slot.
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+ .addReg(FramePtr, RegState::Kill);
+
+ if (needsFrameMoves) {
+ // Mark the place where EBP/RBP was saved.
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+
+ // Define the current CFA rule to use the provided offset.
+ if (StackSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ }
+
+ // Change the rule for the FramePtr to be an "offset" rule.
+ MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
+ MachineLocation FPSrc(FramePtr);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ // Update EBP with the new base value...
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+ .addReg(StackPtr);
+
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+
+ // Define the current CFA to use the EBP/RBP register.
+ MachineLocation FPDst(FramePtr);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(FramePtr);
+
+ // Realign stack
+ if (RegInfo->needsStackRealignment(MF)) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
+ StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ } else {
+ NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+ }
+
+ // Skip the callee-saved push instructions.
+ bool PushedRegs = false;
+ int StackOffset = 2 * stackGrowth;
+
+ while (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == X86::PUSH32r ||
+ MBBI->getOpcode() == X86::PUSH64r)) {
+ PushedRegs = true;
+ ++MBBI;
+
+ if (!HasFP && needsFrameMoves) {
+ // Mark callee-saved push instruction.
+ MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+ // Define the current CFA rule to use the provided offset.
+ unsigned Ptr = StackSize ?
+ MachineLocation::VirtualFP : StackPtr;
+ MachineLocation SPDst(Ptr);
+ MachineLocation SPSrc(Ptr, StackOffset);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ StackOffset += stackGrowth;
+ }
+ }
+
+ DL = MBB.findDebugLoc(MBBI);
+
+ // If there is an SUB32ri of ESP immediately before this instruction, merge
+ // the two. This can be the case when tail call elimination is enabled and
+ // the callee has more arguments then the caller.
+ NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately after this
+ // instruction, merge the two instructions.
+ mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
+ // Adjust stack pointer: ESP -= numbytes.
+
+ // Windows and cygwin/mingw require a prologue helper routine when allocating
+ // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
+ // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
+ // stack and adjust the stack pointer in one go. The 64-bit version of
+ // __chkstk is only responsible for probing the stack. The 64-bit prologue is
+ // responsible for adjusting the stack pointer. Touching the stack at 4K
+ // increments is necessary to ensure that the guard pages used by the OS
+ // virtual memory manager are allocated in correct sequence.
+ if (NumBytes >= 4096 &&
+ (STI.isTargetCygMing() || STI.isTargetWin32()) &&
+ !STI.isTargetEnvMacho()) {
+ // Check whether EAX is livein for this function.
+ bool isEAXAlive = isEAXLiveIn(MF);
+
+ const char *StackProbeSymbol =
+ STI.isTargetWindows() ? "_chkstk" : "_alloca";
+ if (Is64Bit && STI.isTargetCygMing())
+ StackProbeSymbol = "__chkstk";
+ unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+ if (!isEAXAlive) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ } else {
+ // Save EAX
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+ .addReg(X86::EAX, RegState::Kill);
+
+ // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
+ // allocated bytes for EAX.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes - 4);
+ BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes - 4);
+ MBB.insert(MBBI, MI);
+ }
+ } else if (NumBytes >= 4096 &&
+ STI.isTargetWin64() &&
+ !STI.isTargetEnvMacho()) {
+ // Sanity check that EAX is not livein for this function. It should
+ // not be, so throw an assert.
+ assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
+
+ // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+ // Function prologue is responsible for adjusting the stack pointer.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
+ .addExternalSymbol("__chkstk")
+ .addReg(StackPtr, RegState::Define | RegState::Implicit);
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ TII, *RegInfo);
+ } else if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ TII, *RegInfo);
+
+ if ((NumBytes || PushedRegs) && needsFrameMoves) {
+ // Mark end of stack pointer adjustment.
+ MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+ if (!HasFP && NumBytes) {
+ // Define the current CFA rule to use the provided offset.
+ if (StackSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP,
+ -StackSize + stackGrowth);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ }
+ }
+
+ // Emit DWARF info specifying the offsets of the callee-saved registers.
+ if (PushedRegs)
+ emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
+ }
+}
+
+void X86FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI != MBB.end() && "Returning block has no instructions");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+ bool Is64Bit = STI.is64Bit();
+ unsigned StackAlign = getStackAlignment();
+ unsigned SlotSize = RegInfo->getSlotSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+
+ switch (RetOpcode) {
+ default:
+ llvm_unreachable("Can only insert epilog into returning blocks");
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64:
+ break; // These are ok
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+ uint64_t MaxAlign = MFI->getMaxAlignment();
+ unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else
+ MaxAlign = MaxAlign ? MaxAlign : 4;
+ }
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment.
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (RegInfo->needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+ NumBytes = FrameSize - CSSize;
+
+ // Pop EBP.
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+ } else {
+ NumBytes = StackSize - CSSize;
+ }
+
+ // Skip the callee-saved pop instructions.
+ MachineBasicBlock::iterator LastCSPop = MBBI;
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+
+ if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
+ !PI->getDesc().isTerminator())
+ break;
+
+ --MBBI;
+ }
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately before this
+ // instruction, merge the two instructions.
+ if (NumBytes || MFI->hasVarSizedObjects())
+ mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ // If dynamic alloca is used, then reset esp to point to the last callee-saved
+ // slot before popping them off! Same applies for the case, when stack was
+ // realigned.
+ if (RegInfo->needsStackRealignment(MF)) {
+ // We cannot use LEA here, because stack pointer was realigned. We need to
+ // deallocate local frame back.
+ if (CSSize) {
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ MBBI = prior(LastCSPop);
+ }
+
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(FramePtr);
+ } else if (MFI->hasVarSizedObjects()) {
+ if (CSSize) {
+ unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
+ MachineInstr *MI =
+ addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+ FramePtr, false, -CSSize);
+ MBB.insert(MBBI, MI);
+ } else {
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+ .addReg(FramePtr);
+ }
+ } else if (NumBytes) {
+ // Adjust stack pointer back: ESP += numbytes.
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ }
+
+ // We're returning from function via eh_return.
+ if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &DestAddr = MBBI->getOperand(0);
+ assert(DestAddr.isReg() && "Offset should be in register!");
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(DestAddr.getReg());
+ } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
+ RetOpcode == X86::TCRETURNmi ||
+ RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
+ RetOpcode == X86::TCRETURNmi64) {
+ bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int MaxTCDelta = X86FI->getTCReturnAddrDelta();
+ int Offset = 0;
+ assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
+
+ // Incoporate the retaddr area.
+ Offset = StackAdj-MaxTCDelta;
+ assert(Offset >= 0 && "Offset should never be negative");
+
+ if (Offset) {
+ // Check for possible merge with preceeding ADD instruction.
+ Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
+ }
+
+ // Jump to label or value in register.
+ if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+ ? X86::TAILJMPd : X86::TAILJMPd64));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+ } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
+ ? X86::TAILJMPm : X86::TAILJMPm64));
+ for (unsigned i = 0; i != 5; ++i)
+ MIB.addOperand(MBBI->getOperand(i));
+ } else if (RetOpcode == X86::TCRETURNri64) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
+ (X86FI->getTCReturnAddrDelta() < 0)) {
+ // Add the return addr area delta back since we are not tail calling.
+ int delta = -1*X86FI->getTCReturnAddrDelta();
+ MBBI = MBB.getLastNonDebugInstr();
+
+ // Check for possible merge with preceeding ADD instruction.
+ delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
+ }
+}
+
+void
+X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth = (STI.is64Bit() ? -8 : -4);
+ const X86RegisterInfo *RI = TM.getRegisterInfo();
+
+ // Initial state of the frame pointer is esp+stackGrowth.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(RI->getStackRegister(), stackGrowth);
+ Moves.push_back(MachineMove(0, Dst, Src));
+
+ // Add return address to move list
+ MachineLocation CSDst(RI->getStackRegister(), stackGrowth);
+ MachineLocation CSSrc(RI->getRARegister());
+ Moves.push_back(MachineMove(0, CSDst, CSSrc));
+}
+
+int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
+ const X86RegisterInfo *RI =
+ static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (RI->needsStackRealignment(MF)) {
+ if (FI < 0) {
+ // Skip the saved EBP.
+ Offset += RI->getSlotSize();
+ } else {
+ unsigned Align = MFI->getObjectAlignment(FI);
+ assert((-(Offset + StackSize)) % Align == 0);
+ Align = 0;
+ return Offset + StackSize;
+ }
+ // FIXME: Support tail calls
+ } else {
+ if (!hasFP(MF))
+ return Offset + StackSize;
+
+ // Skip the saved EBP.
+ Offset += RI->getSlotSize();
+
+ // Skip the RETADDR move area
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ Offset -= TailCallReturnAddrDelta;
+ }
+
+ return Offset;
+}
+
+bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
+
+ bool isWin64 = STI.isTargetWin64();
+ unsigned SlotSize = STI.is64Bit() ? 8 : 4;
+ unsigned FPReg = TRI->getFrameRegister(MF);
+ unsigned CalleeFrameSize = 0;
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+
+ unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitPrologue will handle spilling of frame register.
+ continue;
+ if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
+ CalleeFrameSize += SlotSize;
+ BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+ RC, TRI);
+ }
+ }
+
+ X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+ return true;
+}
+
+bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ unsigned FPReg = TRI->getFrameRegister(MF);
+ bool isWin64 = STI.isTargetWin64();
+ unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
+ continue;
+ if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
+ BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+ }
+ return true;
+}
+
+void
+X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ unsigned SlotSize = RegInfo->getSlotSize();
+
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
+ if (TailCallReturnAddrDelta < 0) {
+ // create RETURNADDR area
+ // arg
+ // arg
+ // RETADDR
+ // { ...
+ // RETADDR area
+ // ...
+ // }
+ // [EBP]
+ MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+ (-1U*SlotSize)+TailCallReturnAddrDelta, true);
+ }
+
+ if (hasFP(MF)) {
+ assert((TailCallReturnAddrDelta <= 0) &&
+ "The Delta should always be zero or negative");
+ const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+ // Create a frame entry for the EBP register that must be saved.
+ int FrameIdx = MFI->CreateFixedObject(SlotSize,
+ -(int)SlotSize +
+ TFI.getOffsetOfLocalArea() +
+ TailCallReturnAddrDelta,
+ true);
+ assert(FrameIdx == MFI->getObjectIndexBegin() &&
+ "Slot for EBP register must be last in order to be found!");
+ FrameIdx = 0;
+ }
+}
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
new file mode 100644
index 0000000..d71108c
--- /dev/null
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -0,0 +1,65 @@
+//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements X86-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_FRAMELOWERING_H
+#define X86_FRAMELOWERING_H
+
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MCSymbol;
+ class X86TargetMachine;
+
+class X86FrameLowering : public TargetFrameLowering {
+ const X86TargetMachine &TM;
+ const X86Subtarget &STI;
+public:
+ explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
+ : TargetFrameLowering(StackGrowsDown,
+ sti.getStackAlignment(),
+ (sti.is64Bit() ? -8 : -4)),
+ TM(tm), STI(sti) {
+ }
+
+ void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
+ unsigned FramePtr) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index c523441..9b0ec6e 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -190,20 +190,19 @@ namespace {
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
- bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
- bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
+ bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
- bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectLEAAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
bool SelectScalarSSELoad(SDNode *Root, SDValue N,
@@ -264,12 +263,6 @@ namespace {
return CurDAG->getTargetConstant(Imm, MVT::i8);
}
- /// getI16Imm - Return a target constant with the specified value, of type
- /// i16.
- inline SDValue getI16Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i16);
- }
-
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(unsigned Imm) {
@@ -511,10 +504,11 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// FIXME: optimize the case where the src/dest is a load or store?
SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
N->getOperand(0),
- MemTmp, NULL, 0, MemVT,
+ MemTmp, MachinePointerInfo(), MemVT,
false, false, 0);
- SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp,
- NULL, 0, MemVT, false, false, 0);
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
// extload we created. This will cause general havok on the dag because
@@ -536,9 +530,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
MachineFrameInfo *MFI) {
const TargetInstrInfo *TII = TM.getInstrInfo();
- if (Subtarget->isTargetCygMing())
+ if (Subtarget->isTargetCygMing()) {
+ unsigned CallOp =
+ Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32;
BuildMI(BB, DebugLoc(),
- TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
+ TII->get(CallOp)).addExternalSymbol("__main");
+ }
}
void X86DAGToDAGISel::EmitFunctionEntryCode() {
@@ -549,29 +546,27 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() {
}
-bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
- X86ISelAddressMode &AM) {
- assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
- SDValue Segment = N.getOperand(0);
-
- if (AM.Segment.getNode() == 0) {
- AM.Segment = Segment;
- return false;
- }
-
- return true;
-}
-
-bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
+bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+ SDValue Address = N->getOperand(1);
+
+ // load gs:0 -> GS segment register.
+ // load fs:0 -> FS segment register.
+ //
// This optimization is valid because the GNU TLS model defines that
// gs:0 (or fs:0 on X86-64) contains its own address.
// For more information see http://people.redhat.com/drepper/tls.pdf
-
- SDValue Address = N.getOperand(1);
- if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
- !MatchSegmentBaseAddress (Address, AM))
- return false;
-
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+ if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
+ Subtarget->isTargetELF())
+ switch (N->getPointerInfo().getAddrSpace()) {
+ case 256:
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ return false;
+ case 257:
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ return false;
+ }
+
return true;
}
@@ -690,25 +685,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
return false;
}
-/// isLogicallyAddWithConstant - Return true if this node is semantically an
-/// add of a value with a constantint.
-static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) {
- // Check for (add x, Cst)
- if (V->getOpcode() == ISD::ADD)
- return isa<ConstantSDNode>(V->getOperand(1));
-
- // Check for (or x, Cst), where Cst & x == 0.
- if (V->getOpcode() != ISD::OR ||
- !isa<ConstantSDNode>(V->getOperand(1)))
- return false;
-
- // Handle "X | C" as "X + C" iff X is known to have C bits clear.
- ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1));
-
- // Check to see if the LHS & C is zero.
- return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue());
-}
-
bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
bool is64Bit = Subtarget->is64Bit();
@@ -756,11 +732,6 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
break;
}
- case X86ISD::SegmentBaseAddress:
- if (!MatchSegmentBaseAddress(N, AM))
- return false;
- break;
-
case X86ISD::Wrapper:
case X86ISD::WrapperRIP:
if (!MatchWrapper(N, AM))
@@ -768,7 +739,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
break;
case ISD::LOAD:
- if (!MatchLoad(N, AM))
+ if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
return false;
break;
@@ -799,7 +770,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Okay, we know that we have a scale by now. However, if the scaled
// value is an add of something and a constant, we can fold the
// constant into the disp field here.
- if (isLogicallyAddWithConstant(ShVal, CurDAG)) {
+ if (CurDAG->isBaseWithConstantOffset(ShVal)) {
AM.IndexReg = ShVal.getNode()->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
@@ -943,24 +914,18 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Add an artificial use to this node so that we can keep track of
// it if it gets CSE'd with a different node.
HandleSDNode Handle(N);
- SDValue LHS = Handle.getValue().getNode()->getOperand(0);
- SDValue RHS = Handle.getValue().getNode()->getOperand(1);
X86ISelAddressMode Backup = AM;
- if (!MatchAddressRecursively(LHS, AM, Depth+1) &&
- !MatchAddressRecursively(RHS, AM, Depth+1))
+ if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+ !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
return false;
AM = Backup;
- LHS = Handle.getValue().getNode()->getOperand(0);
- RHS = Handle.getValue().getNode()->getOperand(1);
-
+
// Try again after commuting the operands.
- if (!MatchAddressRecursively(RHS, AM, Depth+1) &&
- !MatchAddressRecursively(LHS, AM, Depth+1))
+ if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
+ !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
return false;
AM = Backup;
- LHS = Handle.getValue().getNode()->getOperand(0);
- RHS = Handle.getValue().getNode()->getOperand(1);
// If we couldn't fold both operands into the address at the same time,
// see if we can just put each operand into a register and fold at least
@@ -968,17 +933,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
if (AM.BaseType == X86ISelAddressMode::RegBase &&
!AM.Base_Reg.getNode() &&
!AM.IndexReg.getNode()) {
- AM.Base_Reg = LHS;
- AM.IndexReg = RHS;
+ N = Handle.getValue();
+ AM.Base_Reg = N.getOperand(0);
+ AM.IndexReg = N.getOperand(1);
AM.Scale = 1;
return false;
}
+ N = Handle.getValue();
break;
}
case ISD::OR:
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
- if (isLogicallyAddWithConstant(N, CurDAG)) {
+ if (CurDAG->isBaseWithConstantOffset(N)) {
X86ISelAddressMode Backup = AM;
ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
uint64_t Offset = CN->getSExtValue();
@@ -1148,10 +1115,30 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
/// SelectAddr - returns true if it is able pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
-bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
+///
+/// Parent is the parent node of the addr operand that is being matched. It
+/// is always a load, store, atomic node, or null. It is only null when
+/// checking memory operands for inline asm nodes.
+bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
+
+ if (Parent &&
+ // This list of opcodes are all the nodes that have an "addr:$ptr" operand
+ // that are not a MemSDNode, and thus don't have proper addrspace info.
+ Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
+ Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
+ Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+ unsigned AddrSpace =
+ cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
+ // AddrSpace 256 -> GS, 257 -> FS.
+ if (AddrSpace == 256)
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ if (AddrSpace == 257)
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ }
+
if (MatchAddress(N, AM))
return false;
@@ -1187,7 +1174,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
- if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment))
+ if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
return true;
}
@@ -1205,7 +1192,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
// Okay, this is a zero extending load. Fold it.
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
- if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
PatternNodeWithChain = SDValue(LD, 0);
return true;
@@ -1216,7 +1203,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
-bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
+bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
@@ -1278,7 +1265,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
}
/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
-bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
@@ -1311,7 +1298,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
!IsLegalToFold(N, P, P, OptLevel))
return false;
- return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
+ return SelectAddr(N.getNode(),
+ N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
/// getGlobalBaseReg - Return an SDNode that returns the value of
@@ -1329,7 +1317,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
SDValue In2L = Node->getOperand(2);
SDValue In2H = Node->getOperand(3);
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return NULL;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
@@ -1355,7 +1343,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
SDValue Ptr = Node->getOperand(1);
SDValue Val = Node->getOperand(2);
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return 0;
bool isInc = false, isDec = false, isSub = false, isCN = false;
@@ -1592,7 +1580,32 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return RetVal;
break;
}
-
+ case X86ISD::UMUL: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ unsigned LoReg;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break;
+ case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
+ case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
+ case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+ }
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ N0, SDValue()).getValue(1);
+
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+ SDValue Ops[] = {N1, InFlag};
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+
+ ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+ ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
+ return NULL;
+ }
+
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDValue N0 = Node->getOperand(0);
@@ -1642,14 +1655,15 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
SDNode *CNode =
- CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
array_lengthof(Ops));
InFlag = SDValue(CNode, 1);
+
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
} else {
- InFlag =
- SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
+ InFlag = SDValue(CNode, 0);
}
// Prevent use of AH in a REX instruction by referencing AX instead.
@@ -1688,7 +1702,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-
+
return NULL;
}
@@ -1773,7 +1787,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (isSigned && !signBitIsZero) {
// Sign extend the low part into the high part.
InFlag =
- SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
+ SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
} else {
// Zero out the high part, effectively zero extending the input.
SDValue ClrNode =
@@ -1787,14 +1801,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
SDNode *CNode =
- CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
array_lengthof(Ops));
InFlag = SDValue(CNode, 1);
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
} else {
InFlag =
- SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
}
// Prevent use of AH in a REX instruction by referencing AX instead.
@@ -1971,7 +1985,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
case 'v': // not offsetable ??
default: return true;
case 'm': // memory
- if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4))
+ if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
return true;
break;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a6db979..27024b4 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16,9 +16,9 @@
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
-#include "X86ShuffleDecode.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
+#include "Utils/X86ShuffleDecode.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -28,6 +28,7 @@
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -56,39 +57,172 @@ using namespace dwarf;
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
-DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
+Disable256Bit("disable-256bit", cl::Hidden,
+ cl::desc("Disable use of 256-bit vectors"));
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2);
+static SDValue Insert128BitVector(SDValue Result,
+ SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl);
+
+static SDValue Extract128BitVector(SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl);
+
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG);
+
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
+/// sets things up to match to an AVX VEXTRACTF128 instruction or a
+/// simple subregister reference. Idx is an index in the 128 bits we
+/// want. It need not be aligned to a 128-bit bounday. That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ EVT VT = Vec.getValueType();
+ assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+
+ EVT ElVT = VT.getVectorElementType();
+
+ int Factor = VT.getSizeInBits() / 128;
+
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(),
+ ElVT,
+ VT.getVectorNumElements() / Factor);
+
+ // Extract from UNDEF is UNDEF.
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::UNDEF, dl, ResultVT);
+
+ if (isa<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR
+ // we can match to VEXTRACTF128.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+ SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
+ VecIdx);
+
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits. This
+/// sets things up to match to an AVX VINSERTF128 instruction or a
+/// simple superregister reference. Idx is an index in the 128 bits
+/// we want. It need not be aligned to a 128-bit bounday. That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result,
+ SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ if (isa<ConstantSDNode>(Idx)) {
+ EVT VT = Vec.getValueType();
+ assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+
+ EVT ElVT = VT.getVectorElementType();
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ EVT ResultVT = Result.getValueType();
+
+ // Insert the relevant 128 bits.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+ Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+ VecIdx);
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// Given two vectors, concat them.
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) {
+ DebugLoc dl = Lower.getDebugLoc();
+
+ assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!");
+
+ EVT VT = EVT::getVectorVT(*DAG.getContext(),
+ Lower.getValueType().getVectorElementType(),
+ Lower.getValueType().getVectorNumElements() * 2);
+
+ // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors).
+ assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!");
+
+ // Insert the upper subvector.
+ SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper,
+ DAG.getConstant(
+ // This is half the length of the result
+ // vector. Start inserting the upper 128
+ // bits here.
+ Lower.getValueType().getVectorNumElements(),
+ MVT::i32),
+ DAG, dl);
+
+ // Insert the lower subvector.
+ Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Vec;
+}
+
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
-
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
-
- if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) {
- if (is64Bit) return new X8664_MachoTargetObjectFile();
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ bool is64Bit = Subtarget->is64Bit();
+
+ if (Subtarget->isTargetEnvMacho()) {
+ if (is64Bit)
+ return new X8664_MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
- } else if (TM.getSubtarget<X86Subtarget>().isTargetELF() ){
- if (is64Bit) return new X8664_ELFTargetObjectFile(TM);
+ }
+
+ if (Subtarget->isTargetELF()) {
+ if (is64Bit)
+ return new X8664_ELFTargetObjectFile(TM);
return new X8632_ELFTargetObjectFile(TM);
- } else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) {
+ }
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
return new TargetLoweringObjectFileCOFF();
- }
llvm_unreachable("unknown subtarget type");
}
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
- X86ScalarSSEf64 = Subtarget->hasSSE2();
- X86ScalarSSEf32 = Subtarget->hasSSE1();
+ X86ScalarSSEf64 = Subtarget->hasXMMInt();
+ X86ScalarSSEf32 = Subtarget->hasXMM();
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
RegInfo = TM.getRegisterInfo();
TD = getTargetData();
// Set up the TargetLowering object.
+ static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setShiftAmountType(MVT::i8);
@@ -96,6 +230,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
+ if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
+ // Setup Windows compiler runtime calls.
+ setLibcallName(RTLIB::SDIV_I64, "_alldiv");
+ setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
+ }
+
if (Subtarget->isTargetDarwin()) {
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(false);
@@ -213,16 +359,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
- if (!X86ScalarSSEf64) {
- setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
- setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
+ if (!X86ScalarSSEf64) {
+ setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
+ setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand);
- // Without SSE, i64->f64 goes through memory; i64->MMX is Legal.
- if (Subtarget->hasMMX() && !DisableMMX)
- setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom);
- else
- setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand);
+ setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
+ // Without SSE, i64->f64 goes through memory.
+ setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
}
@@ -236,30 +379,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
- setOperationAction(ISD::MULHS , MVT::i8 , Expand);
- setOperationAction(ISD::MULHU , MVT::i8 , Expand);
- setOperationAction(ISD::SDIV , MVT::i8 , Expand);
- setOperationAction(ISD::UDIV , MVT::i8 , Expand);
- setOperationAction(ISD::SREM , MVT::i8 , Expand);
- setOperationAction(ISD::UREM , MVT::i8 , Expand);
- setOperationAction(ISD::MULHS , MVT::i16 , Expand);
- setOperationAction(ISD::MULHU , MVT::i16 , Expand);
- setOperationAction(ISD::SDIV , MVT::i16 , Expand);
- setOperationAction(ISD::UDIV , MVT::i16 , Expand);
- setOperationAction(ISD::SREM , MVT::i16 , Expand);
- setOperationAction(ISD::UREM , MVT::i16 , Expand);
- setOperationAction(ISD::MULHS , MVT::i32 , Expand);
- setOperationAction(ISD::MULHU , MVT::i32 , Expand);
- setOperationAction(ISD::SDIV , MVT::i32 , Expand);
- setOperationAction(ISD::UDIV , MVT::i32 , Expand);
- setOperationAction(ISD::SREM , MVT::i32 , Expand);
- setOperationAction(ISD::UREM , MVT::i32 , Expand);
- setOperationAction(ISD::MULHS , MVT::i64 , Expand);
- setOperationAction(ISD::MULHU , MVT::i64 , Expand);
- setOperationAction(ISD::SDIV , MVT::i64 , Expand);
- setOperationAction(ISD::UDIV , MVT::i64 , Expand);
- setOperationAction(ISD::SREM , MVT::i64 , Expand);
- setOperationAction(ISD::UREM , MVT::i64 , Expand);
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+
+ // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
+ setOperationAction(ISD::ADDC, VT, Custom);
+ setOperationAction(ISD::ADDE, VT, Custom);
+ setOperationAction(ISD::SUBC, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Custom);
+ }
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
@@ -276,21 +410,27 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
}
+ if (Subtarget->hasPOPCNT()) {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ }
+
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
@@ -298,7 +438,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SELECT , MVT::i1 , Promote);
// X86 wants to expand cmov itself.
setOperationAction(ISD::SELECT , MVT::i8 , Custom);
- setOperationAction(ISD::SELECT , MVT::i16 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i16 , Custom);
setOperationAction(ISD::SELECT , MVT::i32 , Custom);
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
@@ -341,12 +481,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
}
- if (Subtarget->hasSSE1())
+ if (Subtarget->hasXMM())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
// We may not have a libcall for MEMBARRIER so we should lower this.
setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
-
+
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
// operations are flushed before issuing the locked instruction, and they
@@ -355,15 +495,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setShouldFoldAtomicFences(true);
// Expand certain atomics
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
-
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ }
if (!Subtarget->is64Bit()) {
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
@@ -415,7 +551,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
- if (Subtarget->isTargetCygMing())
+ if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
@@ -512,13 +648,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
{
- bool ignored;
- APFloat TmpFlt(+0.0);
- TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
- &ignored);
+ APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
addLegalFPImmediate(TmpFlt); // FLD0
TmpFlt.changeSign();
addLegalFPImmediate(TmpFlt); // FLD0/FCHS
+
+ bool ignored;
APFloat TmpFlt2(+1.0);
TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
&ignored);
@@ -564,8 +699,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
@@ -613,91 +749,44 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
- if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) {
- addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false);
- addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false);
- addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false);
-
- addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false);
-
- setOperationAction(ISD::ADD, MVT::v8i8, Legal);
- setOperationAction(ISD::ADD, MVT::v4i16, Legal);
- setOperationAction(ISD::ADD, MVT::v2i32, Legal);
- setOperationAction(ISD::ADD, MVT::v1i64, Legal);
-
- setOperationAction(ISD::SUB, MVT::v8i8, Legal);
- setOperationAction(ISD::SUB, MVT::v4i16, Legal);
- setOperationAction(ISD::SUB, MVT::v2i32, Legal);
- setOperationAction(ISD::SUB, MVT::v1i64, Legal);
-
- setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
- setOperationAction(ISD::MUL, MVT::v4i16, Legal);
-
- setOperationAction(ISD::AND, MVT::v8i8, Promote);
- AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64);
- setOperationAction(ISD::AND, MVT::v4i16, Promote);
- AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64);
- setOperationAction(ISD::AND, MVT::v2i32, Promote);
- AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::AND, MVT::v1i64, Legal);
-
- setOperationAction(ISD::OR, MVT::v8i8, Promote);
- AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64);
- setOperationAction(ISD::OR, MVT::v4i16, Promote);
- AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64);
- setOperationAction(ISD::OR, MVT::v2i32, Promote);
- AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::OR, MVT::v1i64, Legal);
-
- setOperationAction(ISD::XOR, MVT::v8i8, Promote);
- AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64);
- setOperationAction(ISD::XOR, MVT::v4i16, Promote);
- AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64);
- setOperationAction(ISD::XOR, MVT::v2i32, Promote);
- AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::XOR, MVT::v1i64, Legal);
-
- setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
- AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
- setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
- AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
- setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
- AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
-
- setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
-
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
-
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
-
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
-
- setOperationAction(ISD::SELECT, MVT::v8i8, Promote);
- setOperationAction(ISD::SELECT, MVT::v4i16, Promote);
- setOperationAction(ISD::SELECT, MVT::v2i32, Promote);
- setOperationAction(ISD::SELECT, MVT::v1i64, Custom);
- setOperationAction(ISD::VSETCC, MVT::v8i8, Custom);
- setOperationAction(ISD::VSETCC, MVT::v4i16, Custom);
- setOperationAction(ISD::VSETCC, MVT::v2i32, Custom);
-
- if (!X86ScalarSSEf64 && Subtarget->is64Bit()) {
- setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom);
- }
- }
-
- if (!UseSoftFloat && Subtarget->hasSSE1()) {
+ if (!UseSoftFloat && Subtarget->hasMMX()) {
+ addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
+ // No operations on x86mmx supported, everything uses intrinsics.
+ }
+
+ // MMX-sized vectors (other than x86mmx) are expected to be expanded
+ // into smaller operations.
+ setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
+ setOperationAction(ISD::AND, MVT::v8i8, Expand);
+ setOperationAction(ISD::AND, MVT::v4i16, Expand);
+ setOperationAction(ISD::AND, MVT::v2i32, Expand);
+ setOperationAction(ISD::AND, MVT::v1i64, Expand);
+ setOperationAction(ISD::OR, MVT::v8i8, Expand);
+ setOperationAction(ISD::OR, MVT::v4i16, Expand);
+ setOperationAction(ISD::OR, MVT::v2i32, Expand);
+ setOperationAction(ISD::OR, MVT::v1i64, Expand);
+ setOperationAction(ISD::XOR, MVT::v8i8, Expand);
+ setOperationAction(ISD::XOR, MVT::v4i16, Expand);
+ setOperationAction(ISD::XOR, MVT::v2i32, Expand);
+ setOperationAction(ISD::XOR, MVT::v1i64, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
+
+ if (!UseSoftFloat && Subtarget->hasXMM()) {
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
@@ -714,7 +803,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
}
- if (!UseSoftFloat && Subtarget->hasSSE2()) {
+ if (!UseSoftFloat && Subtarget->hasXMMInt()) {
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
@@ -795,7 +884,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Do not attempt to promote non-128-bit vectors
if (!VT.is128BitVector())
continue;
-
+
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
setOperationAction(ISD::OR, SVT, Promote);
@@ -818,10 +907,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
- if (!DisableMMX && Subtarget->hasMMX()) {
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
- }
}
if (Subtarget->hasSSE41()) {
@@ -863,9 +948,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
- if (Subtarget->hasSSE42()) {
+ if (Subtarget->hasSSE42())
setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
- }
if (!UseSoftFloat && Subtarget->hasAVX()) {
addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
@@ -878,27 +962,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::LOAD, MVT::v8i32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
+
setOperationAction(ISD::FADD, MVT::v8f32, Legal);
setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom);
- //setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom);
- //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom);
- //setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
- //setOperationAction(ISD::VSETCC, MVT::v8f32, Custom);
-
- // Operations to consider commented out -v16i16 v32i8
- //setOperationAction(ISD::ADD, MVT::v16i16, Legal);
- setOperationAction(ISD::ADD, MVT::v8i32, Custom);
- setOperationAction(ISD::ADD, MVT::v4i64, Custom);
- //setOperationAction(ISD::SUB, MVT::v32i8, Legal);
- //setOperationAction(ISD::SUB, MVT::v16i16, Legal);
- setOperationAction(ISD::SUB, MVT::v8i32, Custom);
- setOperationAction(ISD::SUB, MVT::v4i64, Custom);
- //setOperationAction(ISD::MUL, MVT::v16i16, Legal);
+
setOperationAction(ISD::FADD, MVT::v4f64, Legal);
setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
@@ -906,85 +977,66 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
- setOperationAction(ISD::VSETCC, MVT::v4f64, Custom);
- // setOperationAction(ISD::VSETCC, MVT::v32i8, Custom);
- // setOperationAction(ISD::VSETCC, MVT::v16i16, Custom);
- setOperationAction(ISD::VSETCC, MVT::v8i32, Custom);
-
- // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i8, Custom);
- // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i16, Custom);
- // setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i32, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f32, Custom);
-
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i64, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f64, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i64, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom);
-
-#if 0
- // Not sure we want to do this since there are no 256-bit integer
- // operations in AVX
-
- // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
- // This includes 256-bit vectors
- for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) {
- EVT VT = (MVT::SimpleValueType)i;
-
- // Do not attempt to custom lower non-power-of-2 vectors
- if (!isPowerOf2_32(VT.getVectorNumElements()))
+ // Custom lower build_vector, vector_shuffle, scalar_to_vector,
+ // insert_vector_elt extract_subvector and extract_vector_elt for
+ // 256-bit types.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+ // Do not attempt to custom lower non-256-bit vectors
+ if (!isPowerOf2_32(MVT(VT).getVectorNumElements())
+ || (MVT(VT).getSizeInBits() < 256))
continue;
-
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- }
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ }
+ // Custom-lower insert_subvector and extract_subvector based on
+ // the result type.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+ // Do not attempt to custom lower non-256-bit vectors
+ if (!isPowerOf2_32(MVT(VT).getVectorNumElements()))
+ continue;
- if (Subtarget->is64Bit()) {
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom);
+ if (MVT(VT).getSizeInBits() == 128) {
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ }
+ else if (MVT(VT).getSizeInBits() == 256) {
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ }
}
-#endif
-#if 0
- // Not sure we want to do this since there are no 256-bit integer
- // operations in AVX
-
- // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64.
- // Including 256-bit vectors
- for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) {
- EVT VT = (MVT::SimpleValueType)i;
+ // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
+ // Don't promote loads because we need them for VPERM vector index versions.
- if (!VT.is256BitVector()) {
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ VT++) {
+ if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements())
+ || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256))
continue;
- }
- setOperationAction(ISD::AND, VT, Promote);
- AddPromotedToType (ISD::AND, VT, MVT::v4i64);
- setOperationAction(ISD::OR, VT, Promote);
- AddPromotedToType (ISD::OR, VT, MVT::v4i64);
- setOperationAction(ISD::XOR, VT, Promote);
- AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
- setOperationAction(ISD::LOAD, VT, Promote);
- AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
- setOperationAction(ISD::SELECT, VT, Promote);
- AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
+ setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v4i64);
+ setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v4i64);
+ setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v4i64);
+ //setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote);
+ //AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64);
}
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-#endif
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- // Add/Sub/Mul with overflow operations are custom lowered.
- setOperationAction(ISD::SADDO, MVT::i32, Custom);
- setOperationAction(ISD::UADDO, MVT::i32, Custom);
- setOperationAction(ISD::SSUBO, MVT::i32, Custom);
- setOperationAction(ISD::USUBO, MVT::i32, Custom);
- setOperationAction(ISD::SMULO, MVT::i32, Custom);
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
@@ -992,14 +1044,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
- if (Subtarget->is64Bit()) {
- setOperationAction(ISD::SADDO, MVT::i64, Custom);
- setOperationAction(ISD::UADDO, MVT::i64, Custom);
- setOperationAction(ISD::SSUBO, MVT::i64, Custom);
- setOperationAction(ISD::USUBO, MVT::i64, Custom);
- setOperationAction(ISD::SMULO, MVT::i64, Custom);
+ for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+ // Add/Sub/Mul with overflow operations are custom lowered.
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::SADDO, VT, Custom);
+ setOperationAction(ISD::UADDO, VT, Custom);
+ setOperationAction(ISD::SSUBO, VT, Custom);
+ setOperationAction(ISD::USUBO, VT, Custom);
+ setOperationAction(ISD::SMULO, VT, Custom);
+ setOperationAction(ISD::UMULO, VT, Custom);
}
+ // There are no 8-bit 3-address imul/mul instructions
+ setOperationAction(ISD::SMULO, MVT::i8, Expand);
+ setOperationAction(ISD::UMULO, MVT::i8, Expand);
+
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, 0);
@@ -1016,6 +1075,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
if (Subtarget->is64Bit())
@@ -1023,11 +1085,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
computeRegisterProperties();
- // FIXME: These should be based on subtarget info. Plus, the values should
- // be smaller when we are in optimizing for size mode.
+ // On Darwin, -Os means optimize for size without hurting performance,
+ // do not reduce the limit.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
- maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
+ maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+ maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(16);
benefitFromCodePlacementOpt = true;
}
@@ -1078,7 +1143,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
}
unsigned Align = 4;
- if (Subtarget->hasSSE1())
+ if (Subtarget->hasXMM())
getMaxByValAlign(Ty, Align);
return Align;
}
@@ -1119,7 +1184,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
Subtarget->getStackAlignment() >= 8 &&
- Subtarget->hasSSE2()) {
+ Subtarget->hasXMMInt()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
return MVT::f64;
@@ -1139,21 +1204,11 @@ unsigned X86TargetLowering::getJumpTableEncoding() const {
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT())
return MachineJumpTableInfo::EK_Custom32;
-
+
// Otherwise, use the normal jump table encoding heuristics.
return TargetLowering::getJumpTableEncoding();
}
-/// getPICBaseSymbol - Return the X86-32 PIC base.
-MCSymbol *
-X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF,
- MCContext &Ctx) const {
- const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo();
- return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
- Twine(MF->getFunctionNumber())+"$pb");
-}
-
-
const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
@@ -1188,7 +1243,7 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
// Otherwise, the reference is relative to the PIC base.
- return MCSymbolRefExpr::Create(getPICBaseSymbol(MF, Ctx), Ctx);
+ return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
}
/// getFunctionAlignment - Return the Log2 alignment of this function.
@@ -1196,6 +1251,7 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
}
+// FIXME: Why this routine is here? Move to RegInfo!
std::pair<const TargetRegisterClass*, uint8_t>
X86TargetLowering::findRepresentativeClass(EVT VT) const{
const TargetRegisterClass *RRC = 0;
@@ -1207,8 +1263,7 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{
RRC = (Subtarget->is64Bit()
? X86::GR64RegisterClass : X86::GR32RegisterClass);
break;
- case MVT::v8i8: case MVT::v4i16:
- case MVT::v2i32: case MVT::v1i64:
+ case MVT::x86mmx:
RRC = X86::VR64RegisterClass;
break;
case MVT::f32: case MVT::f64:
@@ -1222,10 +1277,13 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{
return std::make_pair(RRC, Cost);
}
+// FIXME: Why this routine is here? Move to RegInfo!
unsigned
X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
default:
return 0;
@@ -1267,7 +1325,7 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
#include "X86GenCallingConv.inc"
-bool
+bool
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
@@ -1312,16 +1370,18 @@ X86TargetLowering::LowerReturn(SDValue Chain,
SDValue ValToCopy = OutVals[i];
EVT ValVT = ValToCopy.getValueType();
- // If this is x86-64, and we disabled SSE, we can't return FP values
- if ((ValVT == MVT::f32 || ValVT == MVT::f64) &&
- (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
+ // If this is x86-64, and we disabled SSE, we can't return FP values,
+ // or SSE or MMX vectors.
+ if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
+ VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
+ (Subtarget->is64Bit() && !Subtarget->hasXMM())) {
report_fatal_error("SSE register return with SSE disabled");
}
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
// llvm-gcc has never done it right and no one has noticed, so this
// should be OK for now.
if (ValVT == MVT::f64 &&
- (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
+ (Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
report_fatal_error("SSE2 register return with SSE2 disabled");
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
@@ -1340,20 +1400,19 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
if (Subtarget->is64Bit()) {
- if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
- ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
+ if (ValVT == MVT::x86mmx) {
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
ValToCopy);
-
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
if (!Subtarget->hasSSE2())
- ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy);
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
}
}
}
-
+
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
}
@@ -1367,7 +1426,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
- assert(Reg &&
+ assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
@@ -1388,6 +1447,28 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MVT::Other, &RetOps[0], RetOps.size());
}
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg &&
+ Copy->getOpcode() != ISD::FP_EXTEND)
+ return false;
+
+ bool HasRet = false;
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != X86ISD::RET_FLAG)
+ return false;
+ HasRet = true;
+ }
+
+ return HasRet;
+}
+
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
@@ -1412,7 +1493,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
- ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
+ ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
report_fatal_error("SSE register return with SSE disabled");
}
@@ -1433,7 +1514,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64;
if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80;
SDValue Ops[] = { Chain, InFlag };
- Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag,
+ Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Glue,
Ops, 2), 1);
Val = Chain.getValue(0);
@@ -1456,7 +1537,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
MVT::i64, InFlag).getValue(1);
Val = Chain.getValue(0);
}
- Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val);
+ Val = DAG.getNode(ISD::BITCAST, dl, CopyVT, Val);
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
CopyVT, InFlag).getValue(1);
@@ -1499,30 +1580,6 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
return Ins[0].Flags.isSRet();
}
-/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
-/// given CallingConvention value.
-CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
- if (Subtarget->is64Bit()) {
- if (CC == CallingConv::GHC)
- return CC_X86_64_GHC;
- else if (Subtarget->isTargetWin64())
- return CC_X86_Win64_C;
- else
- return CC_X86_64_C;
- }
-
- if (CC == CallingConv::X86_FastCall)
- return CC_X86_32_FastCall;
- else if (CC == CallingConv::X86_ThisCall)
- return CC_X86_32_ThisCall;
- else if (CC == CallingConv::Fast)
- return CC_X86_32_FastCC;
- else if (CC == CallingConv::GHC)
- return CC_X86_32_GHC;
- else
- return CC_X86_32_C;
-}
-
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
@@ -1531,10 +1588,11 @@ static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
DebugLoc dl) {
- SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile*/false, /*AlwaysInline=*/true,
- NULL, 0, NULL, 0);
+ MachinePointerInfo(), MachinePointerInfo());
}
/// IsTailCallConvention - Return true if the calling convention is one that
@@ -1583,7 +1641,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
VA.getLocMemOffset(), isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0);
}
}
@@ -1617,7 +1675,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
+
+ // Allocate shadow area for Win64
+ if (IsWin64) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
unsigned LastVal = ~0U;
SDValue ArgValue;
@@ -1644,12 +1708,12 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
RC = X86::VR256RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
RC = X86::VR128RegisterClass;
- else if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
+ else if (RegVT == MVT::x86mmx)
RC = X86::VR64RegisterClass;
else
llvm_unreachable("Unknown argument type!");
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
// If this is an 8 or 16-bit value, it is really passed promoted to 32
@@ -1662,14 +1726,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::BCvt)
- ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
if (VA.isExtInLoc()) {
// Handle MMX values passed in XMM regs.
if (RegVT.isVector()) {
- ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
- ArgValue, DAG.getConstant(0, MVT::i64));
- ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+ ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
+ ArgValue);
} else
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
}
@@ -1680,8 +1743,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// If value is passed via pointer - do a load.
if (VA.getLocInfo() == CCValAssign::Indirect)
- ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0,
- false, false, 0);
+ ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
+ MachinePointerInfo(), false, false, 0);
InVals.push_back(ArgValue);
}
@@ -1708,8 +1771,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
- CallConv != CallingConv::X86_ThisCall)) {
+ if (!IsWin64 && (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall))) {
FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
}
if (Is64Bit) {
@@ -1719,9 +1782,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
static const unsigned GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
- static const unsigned XMMArgRegsWin64[] = {
- X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
- };
static const unsigned GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
@@ -1729,40 +1789,52 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
- const unsigned *GPR64ArgRegs, *XMMArgRegs;
+ const unsigned *GPR64ArgRegs;
+ unsigned NumXMMRegs = 0;
if (IsWin64) {
- TotalNumIntRegs = 4; TotalNumXMMRegs = 4;
+ // The XMM registers which might contain var arg parameters are shadowed
+ // in their paired GPR. So we only need to save the GPR to their home
+ // slots.
+ TotalNumIntRegs = 4;
GPR64ArgRegs = GPR64ArgRegsWin64;
- XMMArgRegs = XMMArgRegsWin64;
} else {
TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
GPR64ArgRegs = GPR64ArgRegs64Bit;
- XMMArgRegs = XMMArgRegs64Bit;
+
+ NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs);
}
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
- unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
- TotalNumXMMRegs);
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
- assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
+ assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
- if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
+ if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
// Kernel mode asks for SSE to be disabled, so don't push them
// on the stack.
TotalNumXMMRegs = 0;
- // For X86-64, if there are vararg parameters that are passed via
- // registers, then we must store them to their spots on the stack so they
- // may be loaded by deferencing the result of va_next.
- FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
- FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
- FuncInfo->setRegSaveFrameIndex(
- MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
+ if (IsWin64) {
+ const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+ // Get to the caller-allocated home save location. Add 8 to account
+ // for the return address.
+ int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+ FuncInfo->setRegSaveFrameIndex(
+ MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+ FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+ } else {
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so they
+ // may be loaded by deferencing the result of va_next.
+ FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+ FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
+ FuncInfo->setRegSaveFrameIndex(
+ MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
false));
+ }
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
@@ -1773,13 +1845,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(Offset));
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
- X86::GR64RegisterClass);
+ X86::GR64RegisterClass, dl);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(
- FuncInfo->getRegSaveFrameIndex()),
- Offset, false, false, 0);
+ MachinePointerInfo::getFixedStack(
+ FuncInfo->getRegSaveFrameIndex(), Offset),
+ false, false, 0);
MemOps.push_back(Store);
Offset += 8;
}
@@ -1789,7 +1861,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<SDValue, 11> SaveXMMOps;
SaveXMMOps.push_back(Chain);
- unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
+ unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass, dl);
SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
SaveXMMOps.push_back(ALVal);
@@ -1799,8 +1871,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->getVarArgsFPOffset()));
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
- unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
- X86::VR128RegisterClass);
+ unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
+ X86::VR128RegisterClass, dl);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
SaveXMMOps.push_back(Val);
}
@@ -1843,15 +1915,14 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
- const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
- unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
+ unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
- if (Flags.isByVal()) {
+ if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
- }
+
return DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), LocMemOffset,
+ MachinePointerInfo::getStack(LocMemOffset),
false, false, 0);
}
@@ -1867,7 +1938,8 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
- OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0);
+ OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
+ false, false, 0);
return SDValue(OutRetAddr.getNode(), 1);
}
@@ -1886,7 +1958,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
- PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0,
+ MachinePointerInfo::getFixedStack(NewReturnAddrFI),
false, false, 0);
return Chain;
}
@@ -1902,6 +1974,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isTargetWin64();
bool IsStructRet = CallIsStructReturn(Outs);
bool IsSibcall = false;
@@ -1927,7 +2000,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+ // Allocate shadow area for Win64
+ if (IsWin64) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -1986,21 +2065,21 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case CCValAssign::AExt:
if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
// Special case: passing MMX values in XMM registers.
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
} else
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg);
+ Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
break;
case CCValAssign::Indirect: {
// Store the argument.
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0);
Arg = SpillSlot;
break;
@@ -2009,7 +2088,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- if (isVarArg && Subtarget->isTargetWin64()) {
+ if (isVarArg && IsWin64) {
// Win64 ABI requires argument XMM reg to be copied to the corresponding
// shadow reg if callee is a varargs function.
unsigned ShadowReg = 0;
@@ -2075,7 +2154,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
}
- if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) {
+ if (Is64Bit && isVarArg && !IsWin64) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
@@ -2090,7 +2169,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
- assert((Subtarget->hasSSE1() || !NumXMMRegs)
+ assert((Subtarget->hasXMM() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
@@ -2143,7 +2222,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Store relative to framepointer.
MemOpChains2.push_back(
DAG.getStore(ArgChain, dl, Arg, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
@@ -2192,8 +2271,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- (GV->isDeclaration() || GV->isWeakForLinker()) &&
- Subtarget->getDarwinVers() < 9) {
+ (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ Subtarget->getDarwinVers() < 9) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -2206,13 +2285,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
- // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
- // symbols should go through the PLT.
+ // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
+ // external symbols should go through the PLT.
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- Subtarget->getDarwinVers() < 9) {
+ Subtarget->getDarwinVers() < 9) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -2224,7 +2303,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
// Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
if (!IsSibcall && isTailCall) {
@@ -2250,7 +2329,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
// Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
- if (Is64Bit && isVarArg && !Subtarget->isTargetWin64())
+ if (Is64Bit && isVarArg && !IsWin64)
Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
if (InFlag.getNode())
@@ -2337,7 +2416,7 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
- const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
@@ -2364,7 +2443,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
- if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
+ if (!TargetRegisterInfo::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
@@ -2510,14 +2589,17 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+
+ // Allocate shadow area for Win64
+ if (Subtarget->isTargetWin64()) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
if (CCInfo.getNextStackOffset()) {
MachineFunction &MF = DAG.getMachineFunction();
if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
return false;
- if (Subtarget->isTargetWin64())
- // Win64 ABI has additional complications.
- return false;
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
@@ -2564,6 +2646,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
+ // An stdcall caller is expected to clean up its arguments; the callee
+ // isn't going to do that.
+ if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+ return false;
+
return true;
}
@@ -2592,6 +2679,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFPD:
+ case X86ISD::PALIGN:
case X86ISD::SHUFPS:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
@@ -2600,6 +2688,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVLPD:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
+ case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
@@ -2625,6 +2714,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
+ case X86ISD::MOVDDUP:
return DAG.getNode(Opc, dl, VT, V1);
}
@@ -2648,6 +2738,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::PALIGN:
case X86ISD::SHUFPD:
case X86ISD::SHUFPS:
return DAG.getNode(Opc, dl, VT, V1, V2,
@@ -2770,8 +2861,8 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
// First determine if it is required or is profitable to flip the operands.
// If LHS is a foldable load, but RHS is not, flip the condition.
- if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) &&
- !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) {
+ if (ISD::isNON_EXTLoad(LHS.getNode()) &&
+ !ISD::isNON_EXTLoad(RHS.getNode())) {
SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
std::swap(LHS, RHS);
}
@@ -2865,7 +2956,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
/// the second operand.
static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
- if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16)
+ if (VT == MVT::v4f32 || VT == MVT::v4i32 )
return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
if (VT == MVT::v2f64 || VT == MVT::v2i64)
return (Mask[0] < 2 && Mask[1] < 2);
@@ -2933,15 +3024,15 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool hasSSSE3) {
int i, e = VT.getVectorNumElements();
-
+
// Do not handle v2i64 / v2f64 shuffles with palignr.
if (e < 4 || !hasSSSE3)
return false;
-
+
for (i = 0; i != e; ++i)
if (Mask[i] >= 0)
break;
-
+
// All undef, not a palignr.
if (i == e)
return false;
@@ -2952,13 +3043,13 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool NeedsUnary = false;
int s = Mask[i] - i;
-
+
// Check the rest of the elements to see if they are consecutive.
for (++i; i != e; ++i) {
int m = Mask[i];
- if (m < 0)
+ if (m < 0)
continue;
-
+
Unary = Unary && (m < (int)e);
NeedsUnary = NeedsUnary || (m < s);
@@ -3046,10 +3137,10 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
/// <2, 3, 2, 3>
bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
unsigned NumElems = N->getValueType(0).getVectorNumElements();
-
+
if (NumElems != 4)
return false;
-
+
return isUndefOrEqual(N->getMaskElt(0), 2) &&
isUndefOrEqual(N->getMaskElt(1), 3) &&
isUndefOrEqual(N->getMaskElt(2), 2) &&
@@ -3320,6 +3411,44 @@ bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
return true;
}
+/// isVEXTRACTF128Index - Return true if the specified
+/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+/// suitable for input to VEXTRACTF128.
+bool X86::isVEXTRACTF128Index(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+ return false;
+
+ // The index should be aligned on a 128-bit boundary.
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+ unsigned VL = N->getValueType(0).getVectorNumElements();
+ unsigned VBits = N->getValueType(0).getSizeInBits();
+ unsigned ElSize = VBits / VL;
+ bool Result = (Index * ElSize) % 128 == 0;
+
+ return Result;
+}
+
+/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR
+/// operand specifies a subvector insert that is suitable for input to
+/// VINSERTF128.
+bool X86::isVINSERTF128Index(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+ return false;
+
+ // The index should be aligned on a 128-bit boundary.
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+ unsigned VL = N->getValueType(0).getVectorNumElements();
+ unsigned VBits = N->getValueType(0).getSizeInBits();
+ unsigned ElSize = VBits / VL;
+ bool Result = (Index * ElSize) % 128 == 0;
+
+ return Result;
+}
+
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
@@ -3388,6 +3517,42 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
return (Val - i) * EltSize;
}
+/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate
+/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
+/// instructions.
+unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+ llvm_unreachable("Illegal extract subvector for VEXTRACTF128");
+
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+ EVT VecVT = N->getOperand(0).getValueType();
+ EVT ElVT = VecVT.getVectorElementType();
+
+ unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ return Index / NumElemsPerChunk;
+}
+
+/// getInsertVINSERTF128Immediate - Return the appropriate immediate
+/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
+/// instructions.
+unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+ llvm_unreachable("Illegal insert subvector for VINSERTF128");
+
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+ EVT VecVT = N->getValueType(0);
+ EVT ElVT = VecVT.getVectorElementType();
+
+ unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ return Index / NumElemsPerChunk;
+}
+
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
@@ -3537,13 +3702,10 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted
+ // Always build SSE zero vectors as <4 x i32> bitcasted
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
- if (VT.getSizeInBits() == 64) { // MMX
- SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
- } else if (VT.getSizeInBits() == 128) {
+ if (VT.getSizeInBits() == 128) { // SSE
if (HasSSE2) { // SSE2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
@@ -3559,7 +3721,7 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
/// getOnesVector - Returns a vector of specified type with all bits set.
@@ -3571,11 +3733,8 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
// type. This ensures they get CSE'd.
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
- if (VT.getSizeInBits() == 64) // MMX
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
- else // SSE
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
@@ -3640,9 +3799,6 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
- if (SV->getValueType(0).getVectorNumElements() <= 4)
- return SDValue(SV, 0);
-
EVT PVT = MVT::v4f32;
EVT VT = SV->getValueType(0);
DebugLoc dl = SV->getDebugLoc();
@@ -3663,9 +3819,9 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
// Perform the splat.
int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1);
+ V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1);
V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT, V1);
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
@@ -3789,7 +3945,7 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
}
// Actual nodes that may contain scalar elements
- if (Opcode == ISD::BIT_CONVERT) {
+ if (Opcode == ISD::BITCAST) {
V = V.getOperand(0);
EVT SrcVT = V.getValueType();
unsigned NumElems = VT.getVectorNumElements();
@@ -3978,7 +4134,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
}
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
}
/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
@@ -4017,11 +4173,10 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, DebugLoc dl) {
- bool isMMX = VT.getSizeInBits() == 64;
- EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
+ EVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
- SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opc, dl, ShVT, SrcOp,
DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
}
@@ -4029,7 +4184,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
SDValue
X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
SelectionDAG &DAG) const {
-
+
// Check if the scalar load can be widened into a vector load. And if
// the address is "base + cst" see if the cst can be "absorbed" into
// the shuffle mask.
@@ -4046,8 +4201,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
FI = FINode->getIndex();
Offset = 0;
- } else if (Ptr.getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ } else if (DAG.isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
Offset = Ptr.getConstantOperandVal(1);
@@ -4084,41 +4238,42 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
int EltNo = (Offset - StartOffset) >> 2;
int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
- SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0,
+ SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(StartOffset),
false, false, 0);
// Canonicalize it to a v4i32 shuffle.
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getVectorShuffle(MVT::v4i32, dl, V1,
- DAG.getUNDEF(MVT::v4i32), &Mask[0]));
+ DAG.getUNDEF(MVT::v4i32),&Mask[0]));
}
return SDValue();
}
-/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
-/// vector of type 'VT', see if the elements can be replaced by a single large
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
/// load which has the same value as a build_vector whose operands are 'elts'.
///
/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
-///
+///
/// FIXME: we'd also like to handle the case where the last elements are zero
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
/// There's even a handy isZeroNode for that purpose.
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
- DebugLoc &dl, SelectionDAG &DAG) {
+ DebugLoc &DL, SelectionDAG &DAG) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = Elts.size();
-
+
LoadSDNode *LDBase = NULL;
unsigned LastLoadedElt = -1U;
-
+
// For each element in the initializer, see if we've found a load or an undef.
- // If we don't find an initial load element, or later load elements are
+ // If we don't find an initial load element, or later load elements are
// non-consecutive, bail out.
for (unsigned i = 0; i < NumElems; ++i) {
SDValue Elt = Elts[i];
-
+
if (!Elt.getNode() ||
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
return SDValue();
@@ -4143,18 +4298,20 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
// consecutive loads for the low half, generate a vzext_load node.
if (LastLoadedElt == NumElems - 1) {
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
- return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
- LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
- return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
- LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(),
LDBase->getAlignment());
} else if (NumElems == 4 && LastLoadedElt == 1) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
- SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
+ SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
+ Ops, 2, MVT::i32,
+ LDBase->getMemOperand());
+ return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
}
return SDValue();
}
@@ -4162,6 +4319,35 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
+
+ EVT VT = Op.getValueType();
+ EVT ExtVT = VT.getVectorElementType();
+
+ unsigned NumElems = Op.getNumOperands();
+
+ // For AVX-length vectors, build the individual 128-bit pieces and
+ // use shuffles to put them in place.
+ if (VT.getSizeInBits() > 256 &&
+ Subtarget->hasAVX() &&
+ !Disable256Bit &&
+ !ISD::isBuildVectorAllZeros(Op.getNode())) {
+ SmallVector<SDValue, 8> V;
+ V.resize(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ V[i] = Op.getOperand(i);
+ }
+
+ EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
+
+ // Build the lower subvector.
+ SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
+ // Build the upper subvector.
+ SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
+ NumElems/2);
+
+ return ConcatVectors(Lower, Upper, DAG);
+ }
+
// All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
// All one's are handled with pcmpeqd. In AVX, zero's are handled with
// vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
@@ -4169,10 +4355,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
(Op.getValueType().getSizeInBits() != 256 &&
ISD::isBuildVectorAllOnes(Op.getNode()))) {
- // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
+ // Canonicalize this to <4 x i32> (SSE) to
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
- if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
+ if (Op.getValueType() == MVT::v4i32)
return Op;
if (ISD::isBuildVectorAllOnes(Op.getNode()))
@@ -4180,11 +4366,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
}
- EVT VT = Op.getValueType();
- EVT ExtVT = VT.getVectorElementType();
unsigned EVTBits = ExtVT.getSizeInBits();
- unsigned NumElems = Op.getNumOperands();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
@@ -4223,9 +4406,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
(!IsAllConstants || Idx == 0)) {
if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
- // Handle MMX and SSE both.
- EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
- unsigned VecElts = VT == MVT::v2i64 ? 4 : 2;
+ // Handle SSE only.
+ assert(VT == MVT::v2i64 && "Expected an SSE value type!");
+ EVT VecVT = MVT::v4i32;
+ unsigned VecElts = 4;
// Truncate the value (which may itself be a constant) to i32, and
// convert it to a vector with movd (S2V+shuffle to zero extend).
@@ -4245,7 +4429,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DAG.getUNDEF(Item.getValueType()),
&Mask[0]);
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item);
}
}
@@ -4264,11 +4448,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DAG);
} else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
- EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
+ assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ EVT MiddleVT = MVT::v4i32;
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
Subtarget->hasSSE2(), DAG);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
}
@@ -4394,20 +4579,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Check for a build vector of consecutive loads.
for (unsigned i = 0; i < NumElems; ++i)
V[i] = Op.getOperand(i);
-
+
// Check for elements which are consecutive loads.
SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
if (LD.getNode())
return LD;
-
- // For SSE 4.1, use insertps to put the high elements into the low element.
+
+ // For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) {
SDValue Result;
if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
else
Result = DAG.getUNDEF(VT);
-
+
for (unsigned i = 1; i < NumElems; ++i) {
if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
@@ -4415,7 +4600,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
return Result;
}
-
+
// Otherwise, expand into a number of unpckl*, start by extending each of
// our (non-undef) elements to the full vector width with the element in the
// bottom slot of the vector (which generates no code for SSE).
@@ -4441,7 +4626,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
EltStride == NumElems/2)
continue;
-
+
V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
}
EltStride >>= 1;
@@ -4461,21 +4646,21 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
int Mask[2];
- SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0));
+ SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
InVec = Op.getOperand(1);
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
unsigned NumElts = ResVT.getVectorNumElements();
- VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
} else {
- InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec);
+ InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
Mask[0] = 0; Mask[1] = 2;
VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
+ return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
}
// v8i16 shuffles - Prefer shuffles in the following order:
@@ -4557,9 +4742,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1),
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]);
- NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV);
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
+ NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
// Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
// source words for the shuffle, to aid later transformations.
@@ -4628,12 +4813,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
}
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1);
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
if (!TwoInputs)
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
// Calculate the shuffle mask for the second input, shuffle it, and
// OR it with the first shuffled input.
@@ -4648,12 +4833,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
}
- V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2);
+ V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2);
V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
}
// If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
@@ -4820,8 +5005,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
// No SSSE3 - Calculate in place words and then fix all out of place words
// With 0-16 extracts & inserts. Worst case is 16 bytes out of order from
// the 16 different words that comprise the two doublequadword input vectors.
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
- V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V2);
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
SDValue NewV = V2Only ? V2 : V1;
for (int i = 0; i != 8; ++i) {
int Elt0 = MaskVals[i*2];
@@ -4883,25 +5068,23 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
DAG.getIntPtrConstant(i));
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, NewV);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
}
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
-/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be
+/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
/// the right sequence. e.g.
-/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
+/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
static
SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG,
- const TargetLowering &TLI, DebugLoc dl) {
+ SelectionDAG &DAG, DebugLoc dl) {
EVT VT = SVOp->getValueType(0);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
unsigned NumElems = VT.getVectorNumElements();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
- EVT MaskVT = (NewWidth == 4) ? MVT::v4i16 : MVT::v2i32;
- EVT NewVT = MaskVT;
+ EVT NewVT;
switch (VT.getSimpleVT().SimpleTy) {
default: assert(false && "Unexpected!");
case MVT::v4f32: NewVT = MVT::v2f64; break;
@@ -4910,12 +5093,6 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
case MVT::v16i8: NewVT = MVT::v4i32; break;
}
- if (NewWidth == 2) {
- if (VT.isInteger())
- NewVT = MVT::v2i64;
- else
- NewVT = MVT::v2f64;
- }
int Scale = NumElems / NewWidth;
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i += Scale) {
@@ -4935,8 +5112,8 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
MaskVec.push_back(StartIdx / Scale);
}
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1);
- V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2);
+ V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
}
@@ -4953,13 +5130,13 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
// movssrr and movsdrr do not clear top bits. Try to use movd, movq
// instead.
MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
- if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) &&
+ if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
- SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
+ SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
// PR2108
OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
OpVT,
@@ -4969,9 +5146,9 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
}
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
- DAG.getNode(ISD::BIT_CONVERT, dl,
+ DAG.getNode(ISD::BITCAST, dl,
OpVT, SrcOp)));
}
@@ -5125,7 +5302,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
}
static bool MayFoldVectorLoad(SDValue V) {
- if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
V = V.getOperand(0);
@@ -5134,6 +5311,110 @@ static bool MayFoldVectorLoad(SDValue V) {
return false;
}
+// FIXME: the version above should always be used. Since there's
+// a bug where several vector shuffles can't be folded because the
+// DAG is not updated during lowering and a node claims to have two
+// uses while it only has one, use this version, and let isel match
+// another instruction if the load really happens to have more than
+// one use. Remove this version after this bug get fixed.
+// rdar://8434668, PR8156
+static bool RelaxedMayFoldVectorLoad(SDValue V) {
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ V = V.getOperand(0);
+ if (ISD::isNormalLoad(V.getNode()))
+ return true;
+ return false;
+}
+
+/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
+/// a vector extract, and if both can be later optimized into a single load.
+/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
+/// here because otherwise a target specific shuffle node is going to be
+/// emitted for this shuffle, and the optimization not done.
+/// FIXME: This is probably not the best approach, but fix the problem
+/// until the right path is decided.
+static
+bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = V.getValueType();
+ ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
+
+ // Be sure that the vector shuffle is present in a pattern like this:
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
+ if (!V.hasOneUse())
+ return false;
+
+ SDNode *N = *V.getNode()->use_begin();
+ if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
+ SDValue EltNo = N->getOperand(1);
+ if (!isa<ConstantSDNode>(EltNo))
+ return false;
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ bool HasShuffleIntoBitcast = false;
+ if (V.getOpcode() == ISD::BITCAST) {
+ EVT SrcVT = V.getOperand(0).getValueType();
+ if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
+ return false;
+ V = V.getOperand(0);
+ HasShuffleIntoBitcast = true;
+ }
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
+ V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
+
+ // Skip one more bit_convert if necessary
+ if (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ if (ISD::isNormalLoad(V.getNode())) {
+ // Is the original load suitable?
+ LoadSDNode *LN0 = cast<LoadSDNode>(V);
+
+ // FIXME: avoid the multi-use bug that is preventing lots of
+ // of foldings to be detected, this is still wrong of course, but
+ // give the temporary desired behavior, and if it happens that
+ // the load has real more uses, during isel it will not fold, and
+ // will generate poor code.
+ if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse()
+ return false;
+
+ if (!HasShuffleIntoBitcast)
+ return true;
+
+ // If there's a bitcast before the shuffle, check if the load type and
+ // alignment is valid.
+ unsigned Align = LN0->getAlignment();
+ unsigned NewAlign =
+ TLI.getTargetData()->getABITypeAlignment(
+ VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+ return false;
+ }
+
+ return true;
+}
+
+static
+SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ // Canonizalize to v2f64.
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
+ V1, DAG));
+}
+
static
SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
bool HasSSE2) {
@@ -5191,6 +5472,10 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
CanFoldLoad = true;
+ // Both of them can't be memory operations though.
+ if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
+ CanFoldLoad = false;
+
if (CanFoldLoad) {
if (HasSSE2 && NumElems == 2)
return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
@@ -5228,7 +5513,7 @@ static inline unsigned getUNPCKLOpcode(EVT VT) {
case MVT::v16i8: return X86ISD::PUNPCKLBW;
case MVT::v8i16: return X86ISD::PUNPCKLWD;
default:
- llvm_unreachable("Unknow type for unpckl");
+ llvm_unreachable("Unknown type for unpckl");
}
return 0;
}
@@ -5242,63 +5527,111 @@ static inline unsigned getUNPCKHOpcode(EVT VT) {
case MVT::v16i8: return X86ISD::PUNPCKHBW;
case MVT::v8i16: return X86ISD::PUNPCKHWD;
default:
- llvm_unreachable("Unknow type for unpckh");
+ llvm_unreachable("Unknown type for unpckh");
}
return 0;
}
-SDValue
-X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+static
+SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ const X86Subtarget *Subtarget) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
- unsigned NumElems = VT.getVectorNumElements();
- bool isMMX = VT.getSizeInBits() == 64;
- bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
- bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
- bool V1IsSplat = false;
- bool V2IsSplat = false;
- bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
- bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
- MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
if (isZeroShuffle(SVOp))
return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
- // Promote splats to v4f32.
+ // Handle splat operations
if (SVOp->isSplat()) {
- if (isMMX || NumElems < 4)
+ // Special case, this is the only place now where it's
+ // allowed to return a vector_shuffle operation without
+ // using a target specific node, because *hopefully* it
+ // will be optimized away by the dag combiner.
+ if (VT.getVectorNumElements() <= 4 &&
+ CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
return Op;
+
+ // Handle splats by matching through known masks
+ if (VT.getVectorNumElements() <= 4)
+ return SDValue();
+
+ // Canonicalize all of the remaining to v4f32.
return PromoteSplat(SVOp, DAG);
}
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode())
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
- LowerVECTOR_SHUFFLE(NewOp, DAG));
+ return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
} else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode()) {
if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
DAG, Subtarget, dl);
}
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
DAG, Subtarget, dl);
}
}
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned NumElems = VT.getVectorNumElements();
+ bool isMMX = VT.getSizeInBits() == 64;
+ bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ bool V1IsSplat = false;
+ bool V2IsSplat = false;
+ bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+ bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
+ bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX();
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
+ // Shuffle operations on MMX not supported.
+ if (isMMX)
+ return Op;
+
+ // Vector shuffle lowering takes 3 steps:
+ //
+ // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
+ // narrowing and commutation of operands should be handled.
+ // 2) Matching of shuffles with known shuffle masks to x86 target specific
+ // shuffle nodes.
+ // 3) Rewriting of unmatched masks into new generic shuffle operations,
+ // so the shuffle can be broken into other shuffles and the legalizer can
+ // try the lowering again.
+ //
+ // The general ideia is that no vector_shuffle operation should be left to
+ // be matched during isel, all of them must be converted to a target specific
+ // node here.
+
+ // Normalize the input vectors. Here splats, zeroed vectors, profitable
+ // narrowing and commutation of operands should be handled. The actual code
+ // doesn't include all of those, work in progress...
+ SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+ if (NewOp.getNode())
+ return NewOp;
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
// unpckh_undef). Only use pshufd if speed is more important than size.
@@ -5309,6 +5642,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (VT != MVT::v2i64 && VT != MVT::v2f64)
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+ if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef &&
+ RelaxedMayFoldVectorLoad(V1))
+ return getMOVDDup(Op, dl, V1, DAG);
+
+ if (X86::isMOVHLPS_v_undef_Mask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
+
+ // Use to match splats
+ if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+ (VT == MVT::v2f64 || VT == MVT::v2i64))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
if (X86::isPSHUFDMask(SVOp)) {
// The actual implementation will match the mask in the if above and then
// during isel it can match several different instructions, not only pshufd
@@ -5349,7 +5694,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return V2;
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
- if (!isMMX && !X86::isMOVLPMask(SVOp)) {
+ if (!X86::isMOVLPMask(SVOp)) {
if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
@@ -5359,22 +5704,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
// FIXME: fold these into legal mask.
- if (!isMMX) {
- if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
- return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+ if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+ return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
- if (X86::isMOVHLPSMask(SVOp))
- return getMOVHighToLow(Op, dl, DAG);
+ if (X86::isMOVHLPSMask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
- if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
- return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+ if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
- if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
- return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+ if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
- if (X86::isMOVLPMask(SVOp))
- return getMOVLP(Op, dl, DAG, HasSSE2);
- }
+ if (X86::isMOVLPMask(SVOp))
+ return getMOVLP(Op, dl, DAG, HasSSE2);
if (ShouldXformToMOVHLPS(SVOp) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
@@ -5414,13 +5757,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getMOVL(DAG, dl, VT, V2, V1);
}
- if (X86::isUNPCKL_v_undef_Mask(SVOp) || X86::isUNPCKLMask(SVOp))
- return (isMMX) ?
- Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+ if (X86::isUNPCKLMask(SVOp))
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
- if (X86::isUNPCKH_v_undef_Mask(SVOp) || X86::isUNPCKHMask(SVOp))
- return (isMMX) ?
- Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
+ if (X86::isUNPCKHMask(SVOp))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
@@ -5443,19 +5784,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
- if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || X86::isUNPCKLMask(NewSVOp))
- return (isMMX) ?
- NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+ if (X86::isUNPCKLMask(NewSVOp))
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
- if (X86::isUNPCKH_v_undef_Mask(NewSVOp) || X86::isUNPCKHMask(NewSVOp))
- return (isMMX) ?
- NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
+ if (X86::isUNPCKHMask(NewSVOp))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
}
- // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle.
-
// Normalize the node to match x86 shuffle ops if needed
- if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+ if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
@@ -5464,15 +5801,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
SmallVector<int, 16> M;
SVOp->getMask(M);
- // Very little shuffling can be done for 64-bit vectors right now.
- if (VT.getSizeInBits() == 64)
- return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ? Op : SDValue();
+ if (isPALIGNRMask(M, VT, HasSSSE3))
+ return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+ X86::getShufflePALIGNRImmediate(SVOp),
+ DAG);
- // FIXME: pshufb, blends, shifts.
- if (VT.getVectorNumElements() == 2 ||
- ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
- isPALIGNRMask(M, VT, Subtarget->hasSSSE3()))
- return Op;
+ if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
+ SVOp->getSplatIndex() == 0 && V2IsUndef) {
+ if (VT == MVT::v2f64)
+ return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
+ if (VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+ }
if (isPSHUFHWMask(M, VT))
return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
@@ -5494,6 +5834,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
TargetMask, DAG);
}
+ if (X86::isUNPCKL_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ if (X86::isUNPCKH_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG);
@@ -5507,8 +5854,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return NewOp;
}
- // Handle all 4 wide cases with a number of shuffles except for MMX.
- if (NumElems == 4 && !isMMX)
+ // Handle all 4 wide cases with a number of shuffles.
+ if (NumElems == 4)
return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
return SDValue();
@@ -5531,7 +5878,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BIT_CONVERT, dl,
+ DAG.getNode(ISD::BITCAST, dl,
MVT::v4i32,
Op.getOperand(0)),
Op.getOperand(1)));
@@ -5552,14 +5899,14 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
if ((User->getOpcode() != ISD::STORE ||
(isa<ConstantSDNode>(Op.getOperand(1)) &&
cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
- (User->getOpcode() != ISD::BIT_CONVERT ||
+ (User->getOpcode() != ISD::BITCAST ||
User->getValueType(0) != MVT::i32))
return SDValue();
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
Op.getOperand(0)),
Op.getOperand(1));
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Extract);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
} else if (VT == MVT::i32) {
// ExtractPS works with constant index.
if (isa<ConstantSDNode>(Op.getOperand(1)))
@@ -5575,6 +5922,38 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return SDValue();
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+
+ // If this is a 256-bit vector result, first extract the 128-bit
+ // vector and then extract from the 128-bit vector.
+ if (VecVT.getSizeInBits() > 128) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ unsigned NumElems = VecVT.getVectorNumElements();
+ SDValue Idx = Op.getOperand(1);
+
+ if (!isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128);
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ // Get the 128-bit vector.
+ bool Upper = IdxVal >= ExtractNumElems;
+ Vec = Extract128BitVector(Vec, Idx, DAG, dl);
+
+ // Extract from it.
+ SDValue ScaledIdx = Idx;
+ if (Upper)
+ ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(ExtractNumElems,
+ Idx.getValueType()));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+ ScaledIdx);
+ }
+
+ assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+
if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
if (Res.getNode())
@@ -5590,7 +5969,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BIT_CONVERT, dl,
+ DAG.getNode(ISD::BITCAST, dl,
MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
@@ -5650,8 +6029,6 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
unsigned Opc;
if (VT == MVT::v8i16)
Opc = X86ISD::PINSRW;
- else if (VT == MVT::v4i16)
- Opc = X86ISD::MMX_PINSRW;
else if (VT == MVT::v16i8)
Opc = X86ISD::PINSRB;
else
@@ -5689,17 +6066,45 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ // If this is a 256-bit vector result, first insert into a 128-bit
+ // vector and then insert into the 256-bit vector.
+ if (VT.getSizeInBits() > 128) {
+ if (!isa<ConstantSDNode>(N2))
+ return SDValue();
+
+ // Get the 128-bit vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
+ bool Upper = IdxVal >= NumElems / 2;
+
+ SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl);
+
+ // Insert into it.
+ SDValue ScaledN2 = N2;
+ if (Upper)
+ ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2,
+ DAG.getConstant(NumElems /
+ (VT.getSizeInBits() / 128),
+ N2.getValueType()));
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0,
+ N1, ScaledN2);
+
+ // Insert the 128-bit vector
+ // FIXME: Why UNDEF?
+ return Insert128BitVector(N0, Op, N2, DAG, dl);
+ }
+
if (Subtarget->hasSSE41())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EltVT == MVT::i8)
return SDValue();
- DebugLoc dl = Op.getDebugLoc();
- SDValue N0 = Op.getOperand(0);
- SDValue N1 = Op.getOperand(1);
- SDValue N2 = Op.getOperand(2);
-
if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
@@ -5707,31 +6112,79 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
- return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW,
- dl, VT, N0, N1, N2);
+ return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
}
return SDValue();
}
SDValue
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
-
+ EVT OpVT = Op.getValueType();
+
+ // If this is a 256-bit vector result, first insert into a 128-bit
+ // vector and then insert into the 256-bit vector.
+ if (OpVT.getSizeInBits() > 128) {
+ // Insert into a 128-bit vector.
+ EVT VT128 = EVT::getVectorVT(*Context,
+ OpVT.getVectorElementType(),
+ OpVT.getVectorNumElements() / 2);
+
+ Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
+
+ // Insert the 128-bit vector.
+ return Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, OpVT), Op,
+ DAG.getConstant(0, MVT::i32),
+ DAG, dl);
+ }
+
if (Op.getValueType() == MVT::v1i64 &&
Op.getOperand(0).getValueType() == MVT::i64)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
- EVT VT = MVT::v2i32;
- switch (Op.getValueType().getSimpleVT().SimpleTy) {
- default: break;
- case MVT::v16i8:
- case MVT::v8i16:
- VT = MVT::v4i32;
- break;
+ assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 &&
+ "Expected an SSE type!");
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(),
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
+}
+
+// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
+// a simple subregister reference or explicit instructions to grab
+// upper bits of a vector.
+SDValue
+X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasAVX()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue Idx = Op.getNode()->getOperand(1);
+
+ if (Op.getNode()->getValueType(0).getSizeInBits() == 128
+ && Vec.getNode()->getValueType(0).getSizeInBits() == 256) {
+ return Extract128BitVector(Vec, Idx, DAG, dl);
+ }
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(),
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt));
+ return SDValue();
+}
+
+// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
+// simple superregister reference or explicit instructions to insert
+// the upper bits of a vector.
+SDValue
+X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasAVX()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue SubVec = Op.getNode()->getOperand(1);
+ SDValue Idx = Op.getNode()->getOperand(2);
+
+ if (Op.getNode()->getValueType(0).getSizeInBits() == 256
+ && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) {
+ return Insert128BitVector(Vec, SubVec, Idx, DAG, dl);
+ }
+ }
+ return SDValue();
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
@@ -5797,12 +6250,11 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
- if (OpFlag) {
+ if (OpFlag)
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc(), getPointerTy()),
Result);
- }
return Result;
}
@@ -5906,7 +6358,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
// load.
if (isGlobalStubReference(OpFlags))
Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
- PseudoSourceValue::getGOT(), 0, false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
@@ -5929,7 +6381,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags) {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
DebugLoc dl = GA->getDebugLoc();
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
@@ -5978,14 +6430,14 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT, TLSModel::Model model,
bool is64Bit) {
DebugLoc dl = GA->getDebugLoc();
- // Get the Thread Pointer
- SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress,
- DebugLoc(), PtrVT,
- DAG.getRegister(is64Bit? X86::FS : X86::GS,
- MVT::i32));
- SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base,
- NULL, 0, false, false, 0);
+ // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
+ Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
+ is64Bit ? 257 : 256));
+
+ SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getIntPtrConstant(0),
+ MachinePointerInfo(Ptr), false, false, 0);
unsigned char OperandFlags = 0;
// Most TLS accesses are not RIP relative, even on x86-64. One exception is
@@ -6004,14 +6456,14 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec)
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
- PseudoSourceValue::getGOT(), 0, false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, 0);
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
@@ -6020,29 +6472,29 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
-
+
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
// TODO: implement the "local dynamic" model
// TODO: implement the "initial exec"model for pic executables
-
+
// If GV is an alias then use the aliasee for determining
// thread-localness.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GV = GA->resolveAliasedGlobal(false);
-
- TLSModel::Model model
+
+ TLSModel::Model model
= getTLSModel(GV, getTargetMachine().getRelocationModel());
-
+
switch (model) {
case TLSModel::GeneralDynamic:
case TLSModel::LocalDynamic: // not implemented
if (Subtarget->is64Bit())
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
-
+
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
@@ -6053,7 +6505,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
unsigned char OpFlag = 0;
unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
X86ISD::WrapperRIP : X86ISD::Wrapper;
-
+
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
@@ -6062,24 +6514,26 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
OpFlag = X86II::MO_TLVP_PIC_BASE;
else
OpFlag = X86II::MO_TLVP;
- DebugLoc DL = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
- getPointerTy(),
+ GA->getValueType(0),
GA->getOffset(), OpFlag);
SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
-
+
// With PIC32, the address is actually $g + Offset.
if (PIC32)
Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc(), getPointerTy()),
Offset);
-
+
// Lowering the machine isd will make sure everything is in the right
// location.
- SDValue Args[] = { Offset };
- SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1);
-
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Args[] = { Chain, Offset };
+ Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2);
+
// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setAdjustsStack(true);
@@ -6089,7 +6543,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
}
-
+
assert(false &&
"TLS not implemented for this target.");
@@ -6148,12 +6602,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
- if (SrcVT.isVector()) {
- if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) {
- return Op;
- }
+ if (SrcVT.isVector())
return SDValue();
- }
assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
@@ -6174,25 +6624,36 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot,
- PseudoSourceValue::getFixedStack(SSFI), 0,
+ MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
}
SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
- SDValue StackSlot,
+ SDValue StackSlot,
SelectionDAG &DAG) const {
// Build the FILD
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
SDVTList Tys;
bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
if (useSSE)
- Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
+ Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+
+ unsigned ByteSize = SrcVT.getSizeInBits()/8;
+
+ int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, ByteSize, ByteSize);
+
SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
- SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, dl,
- Tys, Ops, array_lengthof(Ops));
+ SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
+ X86ISD::FILD, DL,
+ Tys, Ops, array_lengthof(Ops),
+ SrcVT, MMO);
if (useSSE) {
Chain = Result.getValue(1);
@@ -6202,15 +6663,23 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+ unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys = DAG.getVTList(MVT::Other);
SDValue Ops[] = {
Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
};
- Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops));
- Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot,
- PseudoSourceValue::getFixedStack(SSFI), 0,
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, SSFISize, SSFISize);
+
+ Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
+ Ops, array_lengthof(Ops),
+ Op.getValueType(), MMO);
+ Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
}
@@ -6284,12 +6753,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
DAG.getIntPtrConstant(0)));
SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
- SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2);
+ SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2);
SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
@@ -6317,19 +6786,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
DAG.getIntPtrConstant(0)));
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Load),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
DAG.getIntPtrConstant(0));
// Or the load with the bias.
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, Load)),
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, Bias)));
Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Or),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
DAG.getIntPtrConstant(0));
// Subtract the bias.
@@ -6374,24 +6843,34 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
getPointerTy(), StackSlot, WordOff);
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
- StackSlot, NULL, 0, false, false, 0);
+ StackSlot, MachinePointerInfo(),
+ false, false, 0);
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
- OffsetSlot, NULL, 0, false, false, 0);
+ OffsetSlot, MachinePointerInfo(),
+ false, false, 0);
SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
return Fild;
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
- StackSlot, NULL, 0, false, false, 0);
+ StackSlot, MachinePointerInfo(),
+ false, false, 0);
// For i64 source, we need to add the appropriate power of 2 if the input
// was negative. This is the same as the optimization in
// DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
// we must be careful to do the computation in x87 extended precision, not
// in SSE. (The generic code can't know it's OK to do this, or how to.)
+ int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, 8, 8);
+
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
- SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3);
+ SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
+ MVT::i64, MMO);
APInt FF(32, 0x5F800000ULL);
@@ -6414,9 +6893,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(),
- FudgePtr, PseudoSourceValue::getConstantPool(),
- 0, MVT::f32, false, false, 4);
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+ FudgePtr, MachinePointerInfo::getConstantPool(),
+ MVT::f32, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
@@ -6424,7 +6903,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
std::pair<SDValue,SDValue> X86TargetLowering::
FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
EVT DstTy = Op.getValueType();
@@ -6453,6 +6932,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+
unsigned Opc;
switch (DstTy.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
@@ -6463,37 +6944,43 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
- if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
+ EVT TheVT = Op.getOperand(0).getValueType();
+ if (isScalarFPTypeInSSEReg(TheVT)) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
- Chain = DAG.getStore(Chain, dl, Value, StackSlot,
- PseudoSourceValue::getFixedStack(SSFI), 0,
+ Chain = DAG.getStore(Chain, DL, Value, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDValue Ops[] = {
- Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
+ Chain, StackSlot, DAG.getValueType(TheVT)
};
- Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, MemSize, MemSize);
+ Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
+ DstTy, MMO);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
}
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, MemSize, MemSize);
+
// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };
- SDValue FIST = DAG.getNode(Opc, dl, MVT::Other, Ops, 3);
+ SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+ Ops, 3, DstTy, MMO);
return std::make_pair(FIST, StackSlot);
}
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getValueType().isVector()) {
- if (Op.getValueType() == MVT::v2i32 &&
- Op.getOperand(0).getValueType() == MVT::v2f64) {
- return Op;
- }
+ if (Op.getValueType().isVector())
return SDValue();
- }
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
@@ -6502,7 +6989,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
// Load the result.
return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
- FIST, StackSlot, NULL, 0, false, false, 0);
+ FIST, StackSlot, MachinePointerInfo(), false, false, 0);
}
SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
@@ -6513,7 +7000,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
// Load the result.
return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
- FIST, StackSlot, NULL, 0, false, false, 0);
+ FIST, StackSlot, MachinePointerInfo(), false, false, 0);
}
SDValue X86TargetLowering::LowerFABS(SDValue Op,
@@ -6539,7 +7026,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op,
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
}
@@ -6566,14 +7053,14 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
if (VT.isVector()) {
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::XOR, dl, MVT::v2i64,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
Op.getOperand(0)),
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, Mask)));
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
} else {
return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
}
@@ -6615,7 +7102,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
@@ -6625,7 +7112,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
DAG.getConstant(32, MVT::i32));
- SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, SignBit);
+ SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit);
SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
DAG.getIntPtrConstant(0));
}
@@ -6644,7 +7131,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
@@ -6884,8 +7371,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
- if (Op0.getOpcode() == ISD::AND &&
- Op0.hasOneUse() &&
+ if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
Op1.getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(Op1)->isNullValue() &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
@@ -6894,19 +7380,25 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return NewSetCC;
}
- // Look for "(setcc) == / != 1" to avoid unncessary setcc.
- if (Op0.getOpcode() == X86ISD::SETCC &&
- Op1.getOpcode() == ISD::Constant &&
+ // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
+ // these.
+ if (Op1.getOpcode() == ISD::Constant &&
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
cast<ConstantSDNode>(Op1)->isNullValue()) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
- bool Invert = (CC == ISD::SETNE) ^
- cast<ConstantSDNode>(Op1)->isNullValue();
- if (Invert)
+
+ // If the input is a setcc, then reuse the input setcc or use a new one with
+ // the inverted condition.
+ if (Op0.getOpcode() == X86ISD::SETCC) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ bool Invert = (CC == ISD::SETNE) ^
+ cast<ConstantSDNode>(Op1)->isNullValue();
+ if (!Invert) return Op0;
+
CCode = X86::GetOppositeBranchCondition(CCode);
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ }
}
bool isFP = Op1.getValueType().isFloatingPoint();
@@ -6914,17 +7406,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (X86CC == X86::COND_INVALID)
return SDValue();
- SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
-
- // Use sbb x, x to materialize carry bit into a GPR.
- if (X86CC == X86::COND_B)
- return DAG.getNode(ISD::AND, dl, MVT::i8,
- DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), Cond),
- DAG.getConstant(1, MVT::i8));
-
+ SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), Cond);
+ DAG.getConstant(X86CC, MVT::i8), EFLAGS);
}
SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -6996,11 +7480,8 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
switch (VT.getSimpleVT().SimpleTy) {
default: break;
- case MVT::v8i8:
case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
- case MVT::v4i16:
case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
- case MVT::v2i32:
case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
}
@@ -7051,6 +7532,8 @@ static bool isX86LogicalCmp(SDValue Op) {
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD ||
Opc == X86ISD::SUB ||
+ Opc == X86ISD::ADC ||
+ Opc == X86ISD::SBB ||
Opc == X86ISD::SMUL ||
Opc == X86ISD::UMUL ||
Opc == X86ISD::INC ||
@@ -7060,13 +7543,28 @@ static bool isX86LogicalCmp(SDValue Op) {
Opc == X86ISD::AND))
return true;
+ if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
+ return true;
+
return false;
}
+static bool isZero(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isNullValue();
+}
+
+static bool isAllOnes(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isAllOnesValue();
+}
+
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
bool addTest = true;
SDValue Cond = Op.getOperand(0);
- DebugLoc dl = Op.getDebugLoc();
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
SDValue CC;
if (Cond.getOpcode() == ISD::SETCC) {
@@ -7075,34 +7573,44 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond = NewCond;
}
- // (select (x == 0), -1, 0) -> (sign_bit (x - 1))
- SDValue Op1 = Op.getOperand(1);
- SDValue Op2 = Op.getOperand(2);
+ // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
+ // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
+ // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
+ // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
if (Cond.getOpcode() == X86ISD::SETCC &&
- cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue() == X86::COND_E) {
+ Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
+ isZero(Cond.getOperand(1).getOperand(1))) {
SDValue Cmp = Cond.getOperand(1);
- if (Cmp.getOpcode() == X86ISD::CMP) {
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op1);
+
+ unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+
+ if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
+ (CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
+ SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
+
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+ CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+
+ SDValue Res = // Res = 0 or -1.
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
+
+ if (isAllOnes(Op1) != (CondCode == X86::COND_E))
+ Res = DAG.getNOT(DL, Res, Res.getValueType());
+
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
- ConstantSDNode *RHSC =
- dyn_cast<ConstantSDNode>(Cmp.getOperand(1).getNode());
- if (N1C && N1C->isAllOnesValue() &&
- N2C && N2C->isNullValue() &&
- RHSC && RHSC->isNullValue()) {
- SDValue CmpOp0 = Cmp.getOperand(0);
- Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
- CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
- return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(),
- DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
- }
+ if (N2C == 0 || !N2C->isNullValue())
+ Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
+ return Res;
}
}
- // Look pass (and (setcc_carry (cmp ...)), 1).
+ // Look past (and (setcc_carry (cmp ...)), 1).
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (C && C->getAPIntValue() == 1)
+ if (C && C->getAPIntValue() == 1)
Cond = Cond.getOperand(0);
}
@@ -7135,8 +7643,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
- if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
@@ -7150,11 +7658,28 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond = EmitTest(Cond, X86::COND_NE, DAG);
}
+ // a < b ? -1 : 0 -> RES = ~setcc_carry
+ // a < b ? 0 : -1 -> RES = setcc_carry
+ // a >= b ? -1 : 0 -> RES = setcc_carry
+ // a >= b ? 0 : -1 -> RES = ~setcc_carry
+ if (Cond.getOpcode() == X86ISD::CMP) {
+ unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
+
+ if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
+ (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
+ SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8), Cond);
+ if (isAllOnes(Op1) != (CondCode == X86::COND_B))
+ return DAG.getNOT(DL, Res, Res.getValueType());
+ return Res;
+ }
+ }
+
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = { Op2, Op1, CC, Cond };
- return DAG.getNode(X86ISD::CMOV, dl, VTs, Ops, array_lengthof(Ops));
+ return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
}
// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
@@ -7209,7 +7734,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (C && C->getAPIntValue() == 1)
+ if (C && C->getAPIntValue() == 1)
Cond = Cond.getOperand(0);
}
@@ -7310,7 +7835,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
- if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
@@ -7337,8 +7862,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
- assert(Subtarget->isTargetCygMing() &&
- "This should be used only on Cygwin/Mingw targets");
+ assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
+ "This should be used only on Windows targets");
DebugLoc dl = Op.getDebugLoc();
// Get the inputs.
@@ -7353,9 +7878,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
Flag = Chain.getValue(1);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag);
+ Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
Flag = Chain.getValue(1);
Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
@@ -7369,15 +7894,15 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
- if (!Subtarget->is64Bit()) {
+ if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
- return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
- false, false, 0);
+ return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
}
// __va_list_tag:
@@ -7388,48 +7913,107 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SmallVector<SDValue, 8> MemOps;
SDValue FIN = Op.getOperand(1);
// Store gp_offset
- SDValue Store = DAG.getStore(Op.getOperand(0), dl,
+ SDValue Store = DAG.getStore(Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
MVT::i32),
- FIN, SV, 0, false, false, 0);
+ FIN, MachinePointerInfo(SV), false, false, 0);
MemOps.push_back(Store);
// Store fp_offset
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(4));
- Store = DAG.getStore(Op.getOperand(0), dl,
+ Store = DAG.getStore(Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
MVT::i32),
- FIN, SV, 4, false, false, 0);
+ FIN, MachinePointerInfo(SV, 4), false, false, 0);
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(4));
SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8,
+ Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
+ MachinePointerInfo(SV, 8),
false, false, 0);
MemOps.push_back(Store);
// Store ptr to reg_save_area.
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(8));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16,
- false, false, 0);
+ Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
+ MachinePointerInfo(SV, 16), false, false, 0);
MemOps.push_back(Store);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
&MemOps[0], MemOps.size());
}
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
- // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
- assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
+ assert(Subtarget->is64Bit() &&
+ "LowerVAARG only handles 64-bit va_arg!");
+ assert((Subtarget->isTargetLinux() ||
+ Subtarget->isTargetDarwin()) &&
+ "Unhandled target in LowerVAARG");
+ assert(Op.getNode()->getNumOperands() == 4);
+ SDValue Chain = Op.getOperand(0);
+ SDValue SrcPtr = Op.getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ unsigned Align = Op.getConstantOperandVal(3);
+ DebugLoc dl = Op.getDebugLoc();
- report_fatal_error("VAArgInst is not yet implemented for x86-64!");
- return SDValue();
+ EVT ArgVT = Op.getNode()->getValueType(0);
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+ uint8_t ArgMode;
+
+ // Decide which area this value should be read from.
+ // TODO: Implement the AMD64 ABI in its entirety. This simple
+ // selection mechanism works only for the basic types.
+ if (ArgVT == MVT::f80) {
+ llvm_unreachable("va_arg for f80 not yet implemented");
+ } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
+ ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
+ } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
+ ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
+ } else {
+ llvm_unreachable("Unhandled argument type in LowerVAARG");
+ }
+
+ if (ArgMode == 2) {
+ // Sanity Check: Make sure using fp_offset makes sense.
+ assert(!UseSoftFloat &&
+ !(DAG.getMachineFunction()
+ .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+ Subtarget->hasXMM());
+ }
+
+ // Insert VAARG_64 node into the DAG
+ // VAARG_64 returns two values: Variable Argument Address, Chain
+ SmallVector<SDValue, 11> InstOps;
+ InstOps.push_back(Chain);
+ InstOps.push_back(SrcPtr);
+ InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
+ InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
+ InstOps.push_back(DAG.getConstant(Align, MVT::i32));
+ SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+ SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
+ VTs, &InstOps[0], InstOps.size(),
+ MVT::i64,
+ MachinePointerInfo(SV),
+ /*Align=*/0,
+ /*Volatile=*/false,
+ /*ReadMem=*/true,
+ /*WriteMem=*/true);
+ Chain = VAARG.getValue(1);
+
+ // Load the next argument and return it
+ return DAG.getLoad(ArgVT, dl,
+ Chain,
+ VAARG,
+ MachinePointerInfo(),
+ false, false, 0);
}
SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
@@ -7440,11 +8024,12 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
SDValue SrcPtr = Op.getOperand(2);
const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
- return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr,
+ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
- false, DstSV, 0, SrcSV, 0);
+ false,
+ MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
SDValue
@@ -7713,10 +8298,11 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
} else {
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+// FIXME this must be lowered to get rid of the invalid type.
}
EVT VT = Op.getValueType();
- ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt);
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(NewIntNo, MVT::i32),
Op.getOperand(1), ShAmt);
@@ -7740,13 +8326,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
- NULL, 0, false, false, 0);
+ MachinePointerInfo(), false, false, 0);
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, NULL, 0, false, false, 0);
+ RetAddrFI, MachinePointerInfo(), false, false, 0);
}
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
@@ -7759,7 +8345,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
- FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
false, false, 0);
return FrameAddr;
}
@@ -7784,7 +8371,8 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
DAG.getIntPtrConstant(TD->getPointerSize()));
StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
- Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0);
+ Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+ false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
MF.getRegInfo().addLiveOut(StoreAddrReg);
@@ -7819,11 +8407,13 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
SDValue Addr = Trmp;
OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
- Addr, TrmpAddr, 0, false, false, 0);
+ Addr, MachinePointerInfo(TrmpAddr),
+ false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(2, MVT::i64));
- OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2,
+ OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
+ MachinePointerInfo(TrmpAddr, 2),
false, false, 2);
// Load the 'nest' parameter value into R10.
@@ -7832,11 +8422,13 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(10, MVT::i64));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
- Addr, TrmpAddr, 10, false, false, 0);
+ Addr, MachinePointerInfo(TrmpAddr, 10),
+ false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(12, MVT::i64));
- OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12,
+ OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 12),
false, false, 2);
// Jump to the nested function.
@@ -7844,13 +8436,15 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(20, MVT::i64));
OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
- Addr, TrmpAddr, 20, false, false, 0);
+ Addr, MachinePointerInfo(TrmpAddr, 20),
+ false, false, 0);
unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(22, MVT::i64));
OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
- TrmpAddr, 22, false, false, 0);
+ MachinePointerInfo(TrmpAddr, 22),
+ false, false, 0);
SDValue Ops[] =
{ Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
@@ -7912,22 +8506,26 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, dl,
DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
- Trmp, TrmpAddr, 0, false, false, 0);
+ Trmp, MachinePointerInfo(TrmpAddr),
+ false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(1, MVT::i32));
- OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1,
+ OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 1),
false, false, 1);
const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(5, MVT::i32));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
- TrmpAddr, 5, false, false, 1);
+ MachinePointerInfo(TrmpAddr, 5),
+ false, false, 1);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(6, MVT::i32));
- OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6,
+ OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
+ MachinePointerInfo(TrmpAddr, 6),
false, false, 1);
SDValue Ops[] =
@@ -7959,44 +8557,51 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
- const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
// Save FP Control Word to stack slot
int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
- SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other,
- DAG.getEntryNode(), StackSlot);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, 2, 2);
+
+ SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
+ SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
+ DAG.getVTList(MVT::Other),
+ Ops, 2, MVT::i16, MMO);
// Load FP Control Word from stack slot
- SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0,
- false, false, 0);
+ SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
+ MachinePointerInfo(), false, false, 0);
// Transform as necessary
SDValue CWD1 =
- DAG.getNode(ISD::SRL, dl, MVT::i16,
- DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16,
CWD, DAG.getConstant(0x800, MVT::i16)),
DAG.getConstant(11, MVT::i8));
SDValue CWD2 =
- DAG.getNode(ISD::SRL, dl, MVT::i16,
- DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16,
CWD, DAG.getConstant(0x400, MVT::i16)),
DAG.getConstant(9, MVT::i8));
SDValue RetVal =
- DAG.getNode(ISD::AND, dl, MVT::i16,
- DAG.getNode(ISD::ADD, dl, MVT::i16,
- DAG.getNode(ISD::OR, dl, MVT::i16, CWD1, CWD2),
+ DAG.getNode(ISD::AND, DL, MVT::i16,
+ DAG.getNode(ISD::ADD, DL, MVT::i16,
+ DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
DAG.getConstant(1, MVT::i16)),
DAG.getConstant(3, MVT::i16));
return DAG.getNode((VT.getSizeInBits() < 16 ?
- ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
@@ -8122,16 +8727,16 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(1), DAG.getConstant(23, MVT::i32));
ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
-
+
std::vector<Constant*> CV(4, CI);
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
- Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op);
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
@@ -8149,7 +8754,7 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
Constant *C = ConstantVector::get(CVM1);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
// r = pblendv(r, psllw(r & (char16)15, 4), a);
@@ -8157,31 +8762,27 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(4, MVT::i32));
- R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
- R, M, Op);
+ R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-
+
C = ConstantVector::get(CVM2);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0, false, false, 16);
-
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+
// r = pblendv(r, psllw(r & (char16)63, 2), a);
M = DAG.getNode(ISD::AND, dl, VT, R, M);
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(2, MVT::i32));
- R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
- R, M, Op);
+ R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-
+
// return pblendv(r, r+r, a);
- R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+ R = DAG.getNode(X86ISD::PBLENDVB, dl, VT,
R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
return R;
}
@@ -8198,8 +8799,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
SDValue RHS = N->getOperand(1);
unsigned BaseOp = 0;
unsigned Cond = 0;
- DebugLoc dl = Op.getDebugLoc();
-
+ DebugLoc DL = Op.getDebugLoc();
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
@@ -8238,19 +8838,29 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
BaseOp = X86ISD::SMUL;
Cond = X86::COND_O;
break;
- case ISD::UMULO:
- BaseOp = X86ISD::UMUL;
- Cond = X86::COND_B;
- break;
+ case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
+ MVT::i32);
+ SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
+
+ SDValue SetCC =
+ DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(X86::COND_O, MVT::i32),
+ SDValue(Sum.getNode(), 2));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+ return Sum;
+ }
}
// Also sets EFLAGS.
SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
- SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS);
+ SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
SDValue SetCC =
- DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1),
- DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1));
+ DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
+ DAG.getConstant(Cond, MVT::i32),
+ SDValue(Sum.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
return Sum;
@@ -8258,10 +8868,10 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
DebugLoc dl = Op.getDebugLoc();
-
+
if (!Subtarget->hasSSE2()) {
SDValue Chain = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0,
+ SDValue Zero = DAG.getConstant(0,
Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
SDValue Ops[] = {
DAG.getRegister(X86::ESP, MVT::i32), // Base
@@ -8272,37 +8882,37 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
Zero,
Chain
};
- SDNode *Res =
+ SDNode *Res =
DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
array_lengthof(Ops));
return SDValue(Res, 0);
}
-
+
unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
if (!isDev)
return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
-
+
unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
-
+
// def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
if (!Op1 && !Op2 && !Op3 && Op4)
return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
-
+
// def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
if (Op1 && !Op2 && !Op3 && !Op4)
return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
-
- // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+
+ // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
// (MFENCE)>;
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
}
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
EVT T = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
unsigned Reg = 0;
unsigned size = 0;
switch(T.getSimpleVT().SimpleTy) {
@@ -8316,24 +8926,26 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
Reg = X86::RAX; size = 8;
break;
}
- SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), dl, Reg,
+ SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
Op.getOperand(2), SDValue());
SDValue Ops[] = { cpIn.getValue(0),
Op.getOperand(1),
Op.getOperand(3),
DAG.getTargetConstant(size, MVT::i8),
cpIn.getValue(1) };
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, dl, Tys, Ops, 5);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
+ Ops, 5, T, MMO);
SDValue cpOut =
- DAG.getCopyFromReg(Result.getValue(0), dl, Reg, T, Result.getValue(1));
+ DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
return cpOut;
}
SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->is64Bit() && "Result not type legalized?");
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
@@ -8349,16 +8961,15 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
return DAG.getMergeValues(Ops, 2, dl);
}
-SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op,
+SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
EVT DstVT = Op.getValueType();
- assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
- Subtarget->hasMMX() && !DisableMMX) &&
- "Unexpected custom BIT_CONVERT");
- assert((DstVT == MVT::i64 ||
+ assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+ Subtarget->hasMMX() && "Unexpected custom BITCAST");
+ assert((DstVT == MVT::i64 ||
(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
- "Unexpected custom BIT_CONVERT");
+ "Unexpected custom BITCAST");
// i64 <=> MMX conversions are Legal.
if (SrcVT==MVT::i64 && DstVT.isVector())
return Op;
@@ -8370,6 +8981,7 @@ SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op,
// All other conversions need to be expanded.
return SDValue();
}
+
SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
@@ -8384,6 +8996,32 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
cast<AtomicSDNode>(Node)->getAlignment());
}
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getNode()->getValueType(0);
+
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ unsigned Opc;
+ bool ExtraOp = false;
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Invalid code");
+ case ISD::ADDC: Opc = X86ISD::ADD; break;
+ case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
+ case ISD::SUBC: Opc = X86ISD::SUB; break;
+ case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
+ }
+
+ if (!ExtraOp)
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2));
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -8397,6 +9035,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
@@ -8441,7 +9081,11 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMULO:
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
- case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG);
+ case ISD::BITCAST: return LowerBITCAST(Op, DAG);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
}
}
@@ -8478,6 +9122,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
default:
assert(false && "Do not know how to custom type legalize this operation!");
return;
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ // We don't want to expand or promote these.
+ return;
case ISD::FP_TO_SINT: {
std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, true);
@@ -8485,13 +9135,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (FIST.getNode() != 0) {
EVT VT = N->getValueType(0);
// Return a load from the stack slot.
- Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0,
- false, false, 0));
+ Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+ MachinePointerInfo(), false, false, 0));
}
return;
}
case ISD::READCYCLECOUNTER: {
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = N->getOperand(0);
SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
@@ -8527,8 +9177,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Ops[] = { swapInH.getValue(0),
N->getOperand(1),
swapInH.getValue(1) };
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, 3);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys,
+ Ops, 3, T, MMO);
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
MVT::i32, Result.getValue(1));
SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
@@ -8601,15 +9253,18 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
- case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
+ case X86ISD::PANDN: return "X86ISD::PANDN";
+ case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
+ case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
+ case X86ISD::PSIGND: return "X86ISD::PSIGND";
+ case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
- case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
@@ -8637,6 +9292,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ";
case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";
+ case X86ISD::ADC: return "X86ISD::ADC";
+ case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
case X86ISD::INC: return "X86ISD::INC";
@@ -8681,7 +9338,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
- case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";
+ case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
+ case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
}
}
@@ -9203,15 +9861,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
-
assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
"Target must have SSE4.2 or AVX features enabled");
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
unsigned Opc;
-
if (!Subtarget->hasAVX()) {
if (memArg)
Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
@@ -9224,24 +9879,318 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
}
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
-
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
for (unsigned i = 0; i < numArgs; ++i) {
MachineOperand &Op = MI->getOperand(i+1);
-
if (!(Op.isReg() && Op.isImplicit()))
MIB.addOperand(Op);
}
-
- BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
+ BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
MI->eraseFromParent();
+ return BB;
+}
+MachineBasicBlock *
+X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // Address into RAX/EAX, other two args into ECX, EDX.
+ unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
+ for (int i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(i));
+
+ unsigned ValOps = X86::AddrNumOperands;
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI->getOperand(ValOps).getReg());
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
+ .addReg(MI->getOperand(ValOps+1).getReg());
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // First arg in ECX, the second in EAX.
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI->getOperand(0).getReg());
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
+ .addReg(MI->getOperand(1).getReg());
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
return BB;
}
MachineBasicBlock *
+X86TargetLowering::EmitVAARG64WithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // Emit va_arg instruction on X86-64.
+
+ // Operands to this pseudo-instruction:
+ // 0 ) Output : destination address (reg)
+ // 1-5) Input : va_list address (addr, i64mem)
+ // 6 ) ArgSize : Size (in bytes) of vararg type
+ // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset
+ // 8 ) Align : Alignment of type
+ // 9 ) EFLAGS (implicit-def)
+
+ assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
+ assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+
+ unsigned DestReg = MI->getOperand(0).getReg();
+ MachineOperand &Base = MI->getOperand(1);
+ MachineOperand &Scale = MI->getOperand(2);
+ MachineOperand &Index = MI->getOperand(3);
+ MachineOperand &Disp = MI->getOperand(4);
+ MachineOperand &Segment = MI->getOperand(5);
+ unsigned ArgSize = MI->getOperand(6).getImm();
+ unsigned ArgMode = MI->getOperand(7).getImm();
+ unsigned Align = MI->getOperand(8).getImm();
+
+ // Memory Reference
+ assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ // Machine Information
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+ const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
+ DebugLoc DL = MI->getDebugLoc();
+
+ // struct va_list {
+ // i32 gp_offset
+ // i32 fp_offset
+ // i64 overflow_area (address)
+ // i64 reg_save_area (address)
+ // }
+ // sizeof(va_list) = 24
+ // alignment(va_list) = 8
+
+ unsigned TotalNumIntRegs = 6;
+ unsigned TotalNumXMMRegs = 8;
+ bool UseGPOffset = (ArgMode == 1);
+ bool UseFPOffset = (ArgMode == 2);
+ unsigned MaxOffset = TotalNumIntRegs * 8 +
+ (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+
+ /* Align ArgSize to a multiple of 8 */
+ unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
+ bool NeedsAlign = (Align > 8);
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *overflowMBB;
+ MachineBasicBlock *offsetMBB;
+ MachineBasicBlock *endMBB;
+
+ unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB
+ unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
+ unsigned OffsetReg = 0;
+
+ if (!UseGPOffset && !UseFPOffset) {
+ // If we only pull from the overflow region, we don't create a branch.
+ // We don't need to alter control flow.
+ OffsetDestReg = 0; // unused
+ OverflowDestReg = DestReg;
+
+ offsetMBB = NULL;
+ overflowMBB = thisMBB;
+ endMBB = thisMBB;
+ } else {
+ // First emit code to check if gp_offset (or fp_offset) is below the bound.
+ // If so, pull the argument from reg_save_area. (branch to offsetMBB)
+ // If not, pull from overflow_area. (branch to overflowMBB)
+ //
+ // thisMBB
+ // | .
+ // | .
+ // offsetMBB overflowMBB
+ // | .
+ // | .
+ // endMBB
+
+ // Registers for the PHI in endMBB
+ OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
+ OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
+
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction *MF = MBB->getParent();
+ overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ // Insert the new basic blocks
+ MF->insert(MBBIter, offsetMBB);
+ MF->insert(MBBIter, overflowMBB);
+ MF->insert(MBBIter, endMBB);
+
+ // Transfer the remainder of MBB and its successor edges to endMBB.
+ endMBB->splice(endMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ thisMBB->end());
+ endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Make offsetMBB and overflowMBB successors of thisMBB
+ thisMBB->addSuccessor(offsetMBB);
+ thisMBB->addSuccessor(overflowMBB);
+
+ // endMBB is a successor of both offsetMBB and overflowMBB
+ offsetMBB->addSuccessor(endMBB);
+ overflowMBB->addSuccessor(endMBB);
+
+ // Load the offset value into a register
+ OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Check if there is enough room left to pull this argument.
+ BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+ .addReg(OffsetReg)
+ .addImm(MaxOffset + 8 - ArgSizeA8);
+
+ // Branch to "overflowMBB" if offset >= max
+ // Fall through to "offsetMBB" otherwise
+ BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
+ .addMBB(overflowMBB);
+ }
+
+ // In offsetMBB, emit code to use the reg_save_area.
+ if (offsetMBB) {
+ assert(OffsetReg != 0);
+
+ // Read the reg_save_area address.
+ unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 16)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Zero-extend the offset
+ unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+ .addImm(0)
+ .addReg(OffsetReg)
+ .addImm(X86::sub_32bit);
+
+ // Add the offset to the reg_save_area to get the final address.
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+ .addReg(OffsetReg64)
+ .addReg(RegSaveReg);
+
+ // Compute the offset for the next argument
+ unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+ .addReg(OffsetReg)
+ .addImm(UseFPOffset ? 16 : 8);
+
+ // Store it back into the va_list.
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .addReg(NextOffsetReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Jump to endMBB
+ BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
+ .addMBB(endMBB);
+ }
+
+ //
+ // Emit code to use overflow area
+ //
+
+ // Load the overflow_area address into a register.
+ unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we need to align it, do so. Otherwise, just copy the address
+ // to OverflowDestReg.
+ if (NeedsAlign) {
+ // Align the overflow address
+ assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
+ unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
+
+ // aligned_addr = (addr + (align-1)) & ~(align-1)
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+ .addReg(OverflowAddrReg)
+ .addImm(Align-1);
+
+ BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+ .addReg(TmpReg)
+ .addImm(~(uint64_t)(Align-1));
+ } else {
+ BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+ .addReg(OverflowAddrReg);
+ }
+
+ // Compute the next overflow address after this argument.
+ // (the overflow address should be kept 8-byte aligned)
+ unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+ .addReg(OverflowDestReg)
+ .addImm(ArgSizeA8);
+
+ // Store the new overflow address.
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .addReg(NextAddrReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we branched, emit the PHI to the front of endMBB.
+ if (offsetMBB) {
+ BuildMI(*endMBB, endMBB->begin(), DL,
+ TII->get(X86::PHI), DestReg)
+ .addReg(OffsetDestReg).addMBB(offsetMBB)
+ .addReg(OverflowDestReg).addMBB(overflowMBB);
+ }
+
+ // Erase the pseudo instruction
+ MI->eraseFromParent();
+
+ return endMBB;
+}
+
+MachineBasicBlock *
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MachineInstr *MI,
MachineBasicBlock *MBB) const {
@@ -9296,8 +10245,8 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
MachineMemOperand *MMO =
F->getMachineMemOperand(
- PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
- MachineMemOperand::MOStore, Offset,
+ MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+ MachineMemOperand::MOStore,
/*Size=*/16, /*Align=*/16);
BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
.addFrameIndex(RegSaveFrameIndex)
@@ -9389,7 +10338,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
}
MachineBasicBlock *
-X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
+X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -9399,8 +10348,11 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
// FIXME: The code should be tweaked as soon as we'll try to do codegen for
// mingw-w64.
+ const char *StackProbeSymbol =
+ Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca")
+ .addExternalSymbol(StackProbeSymbol)
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
@@ -9418,30 +10370,30 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// our load from the relocation, sticking it in either RDI (x86-64)
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
- const X86InstrInfo *TII
+ const X86InstrInfo *TII
= static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineFunction *F = BB->getParent();
- bool IsWin64 = Subtarget->isTargetWin64();
-
+
+ assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
assert(MI->getOperand(3).isGlobal() && "This should be a global");
-
+
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0).addReg(0)
- .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(*BB, MI, DL, TII->get(IsWin64 ? X86::WINCALL64m : X86::CALL64m));
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
} else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0).addReg(0)
- .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
@@ -9451,13 +10403,13 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0).addReg(0)
- .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
}
-
+
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -9467,13 +10419,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
- case X86::MINGW_ALLOCA:
- return EmitLoweredMingwAlloca(MI, BB);
+ case X86::TAILJMPd64:
+ case X86::TAILJMPr64:
+ case X86::TAILJMPm64:
+ assert(!"TAILJMP64 would not be touched here.");
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
+ // On AMD64, additional defs should be added before register allocation.
+ if (!Subtarget->isTargetWin64()) {
+ MI->addRegisterDefined(X86::RSI);
+ MI->addRegisterDefined(X86::RDI);
+ MI->addRegisterDefined(X86::XMM6);
+ MI->addRegisterDefined(X86::XMM7);
+ MI->addRegisterDefined(X86::XMM8);
+ MI->addRegisterDefined(X86::XMM9);
+ MI->addRegisterDefined(X86::XMM10);
+ MI->addRegisterDefined(X86::XMM11);
+ MI->addRegisterDefined(X86::XMM12);
+ MI->addRegisterDefined(X86::XMM13);
+ MI->addRegisterDefined(X86::XMM14);
+ MI->addRegisterDefined(X86::XMM15);
+ }
+ return BB;
+ case X86::WIN_ALLOCA:
+ return EmitLoweredWinAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_GR8:
- case X86::CMOV_V1I64:
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_V4F32:
@@ -9583,6 +10558,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VPCMPESTRM128MEM:
return EmitPCMP(MI, BB, 5, true /* in mem */);
+ // Thread synchronization.
+ case X86::MONITOR:
+ return EmitMonitor(MI, BB);
+ case X86::MWAIT:
+ return EmitMwait(MI, BB);
+
// Atomic Lowering.
case X86::ATOMAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
@@ -9747,6 +10728,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
false);
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
+
+ case X86::VAARG_64:
+ return EmitVAARG64WithCustomInserter(MI, BB);
}
}
@@ -9773,6 +10757,8 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
default: break;
case X86ISD::ADD:
case X86ISD::SUB:
+ case X86ISD::ADC:
+ case X86ISD::SBB:
case X86ISD::SMUL:
case X86ISD::UMUL:
case X86ISD::INC:
@@ -9791,6 +10777,16 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
}
}
+unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
+ if (Op.getOpcode() == X86ISD::SETCC_CARRY)
+ return Op.getValueType().getScalarType().getSizeInBits();
+
+ // Fallback case.
+ return 1;
+}
+
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
/// node is a GlobalAddress + offset.
bool X86TargetLowering::isGAPlusOffset(SDNode *N,
@@ -9811,13 +10807,18 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
/// if the load addresses are consecutive, non-overlapping, and in the right
/// order.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ TargetLowering::DAGCombinerInfo &DCI) {
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
if (VT.getSizeInBits() != 128)
return SDValue();
+ // Don't create instructions with illegal types after legalize types has run.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
+ return SDValue();
+
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
@@ -9877,8 +10878,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
// Store the value to a temporary stack slot.
SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL,
- 0, false, false, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Replace each use (extract) with a load of the appropriate element.
for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
@@ -9893,11 +10894,12 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(),
- OffsetVal, StackPtr);
+ StackPtr, OffsetVal);
// Load the scalar.
SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
- ScalarAddr, NULL, 0, false, false, 0);
+ ScalarAddr, MachinePointerInfo(),
+ false, false, 0);
// Replace the exact with the load.
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
@@ -10473,6 +11475,36 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
return SDValue();
}
+
+static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ // Want to form PANDN nodes, in the hopes of then easily combining them with
+ // OR and AND nodes to form PBLEND/PSIGN.
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v2i64)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check LHS for vnot
+ if (N0.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1);
+
+ // Check RHS for vnot
+ if (N1.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0);
+
+ return SDValue();
+}
+
static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -10480,12 +11512,99 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT VT = N->getValueType(0);
- if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
return SDValue();
- // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+
+ // look for psign/blend
+ if (Subtarget->hasSSSE3()) {
+ if (VT == MVT::v2i64) {
+ // Canonicalize pandn to RHS
+ if (N0.getOpcode() == X86ISD::PANDN)
+ std::swap(N0, N1);
+ // or (and (m, x), (pandn m, y))
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) {
+ SDValue Mask = N1.getOperand(0);
+ SDValue X = N1.getOperand(1);
+ SDValue Y;
+ if (N0.getOperand(0) == Mask)
+ Y = N0.getOperand(1);
+ if (N0.getOperand(1) == Mask)
+ Y = N0.getOperand(0);
+
+ // Check to see if the mask appeared in both the AND and PANDN and
+ if (!Y.getNode())
+ return SDValue();
+
+ // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+ if (Mask.getOpcode() != ISD::BITCAST ||
+ X.getOpcode() != ISD::BITCAST ||
+ Y.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ // Look through mask bitcast.
+ Mask = Mask.getOperand(0);
+ EVT MaskVT = Mask.getValueType();
+
+ // Validate that the Mask operand is a vector sra node. The sra node
+ // will be an intrinsic.
+ if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+ return SDValue();
+
+ // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+ // there is no psrai.b
+ switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ break;
+ default: return SDValue();
+ }
+
+ // Check that the SRA is all signbits.
+ SDValue SraC = Mask.getOperand(2);
+ unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ if ((SraAmt + 1) != EltBits)
+ return SDValue();
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Now we know we at least have a plendvb with the mask val. See if
+ // we can form a psignb/w/d.
+ // psign = x.type == y.type == mask.type && y = sub(0, x);
+ X = X.getOperand(0);
+ Y = Y.getOperand(0);
+ if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+ ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+ X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
+ unsigned Opc = 0;
+ switch (EltBits) {
+ case 8: Opc = X86ISD::PSIGNB; break;
+ case 16: Opc = X86ISD::PSIGNW; break;
+ case 32: Opc = X86ISD::PSIGND; break;
+ default: break;
+ }
+ if (Opc) {
+ SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
+ }
+ }
+ // PBLENDVB only available on SSE 4.1
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
+ Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
+ Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
+ Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
+ }
+ }
+ }
+
+ // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
@@ -10600,9 +11719,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// pair instead.
if (Subtarget->is64Bit() || F64IsLegal) {
EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
- SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
- Ld->getBasePtr(), Ld->getSrcValue(),
- Ld->getSrcValueOffset(), Ld->isVolatile(),
+ SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->getAlignment());
SDValue NewChain = NewLd.getValue(1);
if (TokenFactorIndex != -1) {
@@ -10611,7 +11729,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
Ops.size());
}
return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
- St->getSrcValue(), St->getSrcValueOffset(),
+ St->getPointerInfo(),
St->isVolatile(), St->isNonTemporal(),
St->getAlignment());
}
@@ -10622,11 +11740,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(4, MVT::i32));
SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
- Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->getAlignment());
SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
- Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
+ Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
MinAlign(Ld->getAlignment(), 4));
@@ -10643,12 +11761,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(4, MVT::i32));
SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
- St->getSrcValue(), St->getSrcValueOffset(),
+ St->getPointerInfo(),
St->isVolatile(), St->isNonTemporal(),
St->getAlignment());
SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
- St->getSrcValue(),
- St->getSrcValueOffset() + 4,
+ St->getPointerInfo().getWithOffset(4),
St->isVolatile(),
St->isNonTemporal(),
MinAlign(St->getAlignment(), 4));
@@ -10706,13 +11823,13 @@ static SDValue PerformBTCombine(SDNode *N,
static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Op = N->getOperand(0);
- if (Op.getOpcode() == ISD::BIT_CONVERT)
+ if (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
EVT VT = N->getValueType(0), OpVT = Op.getValueType();
if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
VT.getVectorElementType().getSizeInBits() ==
OpVT.getVectorElementType().getSizeInBits()) {
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
}
return SDValue();
}
@@ -10743,19 +11860,106 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned X86CC = N->getConstantOperandVal(0);
+ SDValue EFLAG = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
+ // a zext and produces an all-ones bit which is more useful than 0/1 in some
+ // cases.
+ if (X86CC == X86::COND_B)
+ return DAG.getNode(ISD::AND, DL, MVT::i8,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), EFLAG),
+ DAG.getConstant(1, MVT::i8));
+
+ return SDValue();
+}
+
+// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
+static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
+ X86TargetLowering::DAGCombinerInfo &DCI) {
+ // If the LHS and RHS of the ADC node are zero, then it can't overflow and
+ // the result is either zero or one (depending on the input carry bit).
+ // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
+ if (X86::isZeroNode(N->getOperand(0)) &&
+ X86::isZeroNode(N->getOperand(1)) &&
+ // We don't have a good way to replace an EFLAGS use, so only do this when
+ // dead right now.
+ SDValue(N, 1).use_empty()) {
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
+ SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getConstant(X86::COND_B,MVT::i8),
+ N->getOperand(2)),
+ DAG.getConstant(1, VT));
+ return DCI.CombineTo(N, Res1, CarryOut);
+ }
+
+ return SDValue();
+}
+
+// fold (add Y, (sete X, 0)) -> adc 0, Y
+// (add Y, (setne X, 0)) -> sbb -1, Y
+// (sub (sete X, 0), Y) -> sbb 0, Y
+// (sub (setne X, 0), Y) -> adc -1, Y
+static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // Look through ZExts.
+ SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
+ if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
+ return SDValue();
+
+ SDValue SetCC = Ext.getOperand(0);
+ if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
+ return SDValue();
+
+ X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ SDValue Cmp = SetCC.getOperand(1);
+ if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
+ !X86::isZeroNode(Cmp.getOperand(1)) ||
+ !Cmp.getOperand(0).getValueType().isInteger())
+ return SDValue();
+
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
+ DAG.getConstant(1, CmpOp0.getValueType()));
+
+ SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
+ if (CC == X86::COND_NE)
+ return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
+ DL, OtherVal.getValueType(), OtherVal,
+ DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
+ return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
+ DL, OtherVal.getValueType(), OtherVal,
+ DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
- return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+ return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
+ case ISD::ADD:
+ case ISD::SUB: return OptimizeConditonalInDecrement(N, DAG);
+ case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
+ case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case X86ISD::FXOR:
@@ -10764,8 +11968,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
+ case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
case X86ISD::SHUFPS: // Handle all target specific shuffles
case X86ISD::SHUFPD:
+ case X86ISD::PALIGN:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHDQ:
@@ -10785,7 +11991,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
}
return SDValue();
@@ -10892,44 +12098,14 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
-static bool LowerToBSwap(CallInst *CI) {
- // FIXME: this should verify that we are targetting a 486 or better. If not,
- // we will turn this bswap into something that will be lowered to logical ops
- // instead of emitting the bswap asm. For now, we don't support 486 or lower
- // so don't worry about this.
-
- // Verify this is a simple bswap.
- if (CI->getNumArgOperands() != 1 ||
- CI->getType() != CI->getArgOperand(0)->getType() ||
- !CI->getType()->isIntegerTy())
- return false;
-
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
- if (!Ty || Ty->getBitWidth() % 16 != 0)
- return false;
-
- // Okay, we can do this xform, do so now.
- const Type *Tys[] = { Ty };
- Module *M = CI->getParent()->getParent()->getParent();
- Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
-
- Value *Op = CI->getArgOperand(0);
- Op = CallInst::Create(Int, Op, CI->getName(), CI);
-
- CI->replaceAllUsesWith(Op);
- CI->eraseFromParent();
- return true;
-}
-
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
- std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
std::string AsmStr = IA->getAsmString();
// TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
SmallVector<StringRef, 4> AsmPieces;
- SplitString(AsmStr, AsmPieces, "\n"); // ; as separator?
+ SplitString(AsmStr, AsmPieces, ";\n");
switch (AsmPieces.size()) {
default: return false;
@@ -10938,6 +12114,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces.clear();
SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
+ // FIXME: this should verify that we are targetting a 486 or better. If not,
+ // we will turn this bswap into something that will be lowered to logical ops
+ // instead of emitting the bswap asm. For now, we don't support 486 or lower
+ // so don't worry about this.
// bswap $0
if (AsmPieces.size() == 2 &&
(AsmPieces[0] == "bswap" ||
@@ -10947,7 +12127,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[1] == "${0:q}")) {
// No need to check constraints, nothing other than the equivalent of
// "=r,0" would be valid here.
- return LowerToBSwap(CI);
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
// rorw $$8, ${0:w} --> llvm.bswap.i16
if (CI->getType()->isIntegerTy(16) &&
@@ -10957,35 +12140,76 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[2] == "${0:w}" &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
AsmPieces.clear();
- const std::string &Constraints = IA->getConstraintString();
- SplitString(StringRef(Constraints).substr(5), AsmPieces, ",");
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
std::sort(AsmPieces.begin(), AsmPieces.end());
if (AsmPieces.size() == 4 &&
AsmPieces[0] == "~{cc}" &&
AsmPieces[1] == "~{dirflag}" &&
AsmPieces[2] == "~{flags}" &&
AsmPieces[3] == "~{fpsr}") {
- return LowerToBSwap(CI);
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
}
break;
case 3:
- if (CI->getType()->isIntegerTy(64) &&
- Constraints.size() >= 2 &&
- Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
- Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
- // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
+ if (CI->getType()->isIntegerTy(32) &&
+ IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
SmallVector<StringRef, 4> Words;
- SplitString(AsmPieces[0], Words, " \t");
- if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+ SplitString(AsmPieces[0], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+ Words[2] == "${0:w}") {
Words.clear();
- SplitString(AsmPieces[1], Words, " \t");
- if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+ SplitString(AsmPieces[1], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" &&
+ Words[2] == "$0") {
Words.clear();
SplitString(AsmPieces[2], Words, " \t,");
- if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
- Words[2] == "%edx") {
- return LowerToBSwap(CI);
+ if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+ Words[2] == "${0:w}") {
+ AsmPieces.clear();
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+ std::sort(AsmPieces.begin(), AsmPieces.end());
+ if (AsmPieces.size() == 4 &&
+ AsmPieces[0] == "~{cc}" &&
+ AsmPieces[1] == "~{dirflag}" &&
+ AsmPieces[2] == "~{flags}" &&
+ AsmPieces[3] == "~{fpsr}") {
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ }
+ }
+ }
+ }
+
+ if (CI->getType()->isIntegerTy(64)) {
+ InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+ if (Constraints.size() >= 2 &&
+ Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+ Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+ // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
+ SmallVector<StringRef, 4> Words;
+ SplitString(AsmPieces[0], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+ Words.clear();
+ SplitString(AsmPieces[1], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+ Words.clear();
+ SplitString(AsmPieces[2], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+ Words[2] == "%edx") {
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
}
}
}
@@ -11003,18 +12227,32 @@ X86TargetLowering::ConstraintType
X86TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
- case 'A':
- return C_Register;
- case 'f':
- case 'r':
case 'R':
- case 'l':
case 'q':
case 'Q':
- case 'x':
+ case 'f':
+ case 't':
+ case 'u':
case 'y':
+ case 'x':
case 'Y':
return C_RegisterClass;
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ return C_Register;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'G':
+ case 'C':
case 'e':
case 'Z':
return C_Other;
@@ -11025,6 +12263,110 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const {
return TargetLowering::getConstraintType(Constraint);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ X86TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ case 'R':
+ case 'q':
+ case 'Q':
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'f':
+ case 't':
+ case 'u':
+ if (type->isFloatingPointTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'y':
+ if (type->isX86_MMXTy() && Subtarget->hasMMX())
+ weight = CW_SpecificReg;
+ break;
+ case 'x':
+ case 'Y':
+ if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
+ weight = CW_Register;
+ break;
+ case 'I':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
+ if (C->getZExtValue() <= 31)
+ weight = CW_Constant;
+ }
+ break;
+ case 'J':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 63)
+ weight = CW_Constant;
+ }
+ break;
+ case 'K':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
+ weight = CW_Constant;
+ }
+ break;
+ case 'L':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
+ weight = CW_Constant;
+ }
+ break;
+ case 'M':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 3)
+ weight = CW_Constant;
+ }
+ break;
+ case 'N':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xff)
+ weight = CW_Constant;
+ }
+ break;
+ case 'G':
+ case 'C':
+ if (dyn_cast<ConstantFP>(CallOperandVal)) {
+ weight = CW_Constant;
+ }
+ break;
+ case 'e':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80000000LL) &&
+ (C->getSExtValue() <= 0x7fffffffLL))
+ weight = CW_Constant;
+ }
+ break;
+ case 'Z':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xffffffff)
+ weight = CW_Constant;
+ }
+ break;
+ }
+ return weight;
+}
+
/// LowerXConstraint - try to replace an X constraint, which matches anything,
/// with another that has more specific requirements based on the type of the
/// corresponding operand.
@@ -11033,9 +12375,9 @@ LowerXConstraint(EVT ConstraintVT) const {
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets.
if (ConstraintVT.isFloatingPoint()) {
- if (Subtarget->hasSSE2())
+ if (Subtarget->hasXMMInt())
return "Y";
- if (Subtarget->hasSSE1())
+ if (Subtarget->hasXMM())
return "x";
}
@@ -11265,10 +12607,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
if (!Subtarget->hasMMX()) break;
return std::make_pair(0U, X86::VR64RegisterClass);
case 'Y': // SSE_REGS if SSE2 allowed
- if (!Subtarget->hasSSE2()) break;
+ if (!Subtarget->hasXMMInt()) break;
// FALL THROUGH.
case 'x': // SSE_REGS if SSE1 allowed
- if (!Subtarget->hasSSE1()) break;
+ if (!Subtarget->hasXMM()) break;
switch (VT.getSimpleVT().SimpleTy) {
default: break;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index d2d9b28..419da37 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -57,35 +57,6 @@ namespace llvm {
/// corresponds to X86::PSRLDQ.
FSRL,
- /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
- /// integer source in memory and FP reg result. This corresponds to the
- /// X86::FILD*m instructions. It has three inputs (token chain, address,
- /// and source type) and two outputs (FP value and token chain). FILD_FLAG
- /// also produces a flag).
- FILD,
- FILD_FLAG,
-
- /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
- /// integer destination in memory and a FP reg source. This corresponds
- /// to the X86::FIST*m instructions and the rounding mode change stuff. It
- /// has two inputs (token chain and address) and two outputs (int value
- /// and token chain).
- FP_TO_INT16_IN_MEM,
- FP_TO_INT32_IN_MEM,
- FP_TO_INT64_IN_MEM,
-
- /// FLD - This instruction implements an extending load to FP stack slots.
- /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
- /// operand, ptr to load from, and a ValueType node indicating the type
- /// to load to.
- FLD,
-
- /// FST - This instruction implements a truncating store to FP stack
- /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
- /// chain operand, value to store, address, and a ValueType to store it
- /// as.
- FST,
-
/// CALL - These operations represent an abstract X86 call
/// instruction, which includes a bunch of information. In particular the
/// operands of these node are:
@@ -105,7 +76,7 @@ namespace llvm {
///
CALL,
- /// RDTSC_DAG - This operation implements the lowering for
+ /// RDTSC_DAG - This operation implements the lowering for
/// readcyclecounter
RDTSC_DAG,
@@ -115,13 +86,13 @@ namespace llvm {
/// X86 bit-test instructions.
BT,
- /// X86 SetCC. Operand 0 is condition code, and operand 1 is the flag
- /// operand produced by a CMP instruction.
+ /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
+ /// operand, usually produced by a CMP instruction.
SETCC,
// Same as SETCC except it's materialized with a sbb and the value is all
// one's or all zero's.
- SETCC_CARRY,
+ SETCC_CARRY, // R = carry_bit ? ~0 : 0
/// X86 conditional moves. Operand 0 and operand 1 are the two values
/// to select from. Operand 2 is the condition code, and operand 3 is the
@@ -157,11 +128,15 @@ namespace llvm {
/// relative displacements.
WrapperRIP,
- /// MOVQ2DQ - Copies a 64-bit value from a vector to another vector.
- /// Can be used to move a vector value from a MMX register to a XMM
- /// register.
+ /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
+ /// of an XMM vector, with the high word zero filled.
MOVQ2DQ,
+ /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+ /// to an MMX vector. If you think this is too close to the previous
+ /// mnemonic, so do I; blame Intel.
+ MOVDQ2Q,
+
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRB.
PEXTRB,
@@ -184,7 +159,16 @@ namespace llvm {
/// PSHUFB - Shuffle 16 8-bit values within a vector.
PSHUFB,
-
+
+ /// PANDN - and with not'd value.
+ PANDN,
+
+ /// PSIGNB/W/D - Copy integer sign.
+ PSIGNB, PSIGNW, PSIGND,
+
+ /// PBLENDVB - Variable blend
+ PBLENDVB,
+
/// FMAX, FMIN - Floating point max and min.
///
FMAX, FMIN,
@@ -196,17 +180,14 @@ namespace llvm {
// TLSADDR - Thread Local Storage.
TLSADDR,
-
+
// TLSCALL - Thread Local Storage. When calling to an OS provided
// thunk at the address from an earlier relocation.
TLSCALL,
- // SegmentBaseAddress - The address segment:0
- SegmentBaseAddress,
-
// EH_RETURN - Exception Handling helpers.
EH_RETURN,
-
+
/// TC_RETURN - Tail call return.
/// operand #0 chain
/// operand #1 callee (register or absolute)
@@ -214,37 +195,29 @@ namespace llvm {
/// operand #3 optional in flag
TC_RETURN,
- // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
- LCMPXCHG_DAG,
- LCMPXCHG8_DAG,
-
- // FNSTCW16m - Store FP control world into i16 memory.
- FNSTCW16m,
-
// VZEXT_MOVL - Vector move low and zero extend.
VZEXT_MOVL,
- // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
- VZEXT_LOAD,
-
// VSHL, VSRL - Vector logical left / right shift.
VSHL, VSRL,
// CMPPD, CMPPS - Vector double/float comparison.
// CMPPD, CMPPS - Vector double/float comparison.
CMPPD, CMPPS,
-
+
// PCMP* - Vector integer comparisons.
PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
- // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results.
- ADD, SUB, SMUL, UMUL,
+ // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+ ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
+
+ UMUL, // LOW, HI, FLAGS = umul LHS, RHS
// MUL_IMM - X86 specific multiply by immediate.
MUL_IMM,
-
+
// PTEST - Vector bitwise comparisons
PTEST,
@@ -291,11 +264,17 @@ namespace llvm {
// with control flow.
VASTART_SAVE_XMM_REGS,
- // MINGW_ALLOCA - MingW's __alloca call to do stack probing.
- MINGW_ALLOCA,
+ // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
+ WIN_ALLOCA,
+
+ // Memory barrier
+ MEMBARRIER,
+ MFENCE,
+ SFENCE,
+ LFENCE,
- // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
- // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
+ // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
+ // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
// Atomic 64-bit binary operations.
ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
ATOMSUB64_DAG,
@@ -304,12 +283,49 @@ namespace llvm {
ATOMAND64_DAG,
ATOMNAND64_DAG,
ATOMSWAP64_DAG,
-
- // Memory barrier
- MEMBARRIER,
- MFENCE,
- SFENCE,
- LFENCE
+
+ // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
+ LCMPXCHG_DAG,
+ LCMPXCHG8_DAG,
+
+ // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+ VZEXT_LOAD,
+
+ // FNSTCW16m - Store FP control world into i16 memory.
+ FNSTCW16m,
+
+ /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+ /// integer destination in memory and a FP reg source. This corresponds
+ /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+ /// has two inputs (token chain and address) and two outputs (int value
+ /// and token chain).
+ FP_TO_INT16_IN_MEM,
+ FP_TO_INT32_IN_MEM,
+ FP_TO_INT64_IN_MEM,
+
+ /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+ /// integer source in memory and FP reg result. This corresponds to the
+ /// X86::FILD*m instructions. It has three inputs (token chain, address,
+ /// and source type) and two outputs (FP value and token chain). FILD_FLAG
+ /// also produces a flag).
+ FILD,
+ FILD_FLAG,
+
+ /// FLD - This instruction implements an extending load to FP stack slots.
+ /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+ /// operand, ptr to load from, and a ValueType node indicating the type
+ /// to load to.
+ FLD,
+
+ /// FST - This instruction implements a truncating store to FP stack
+ /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+ /// chain operand, value to store, address, and a ValueType to store it
+ /// as.
+ FST,
+
+ /// VAARG_64 - This instruction grabs the address of the next argument
+ /// from a va_list. (reads and modifies the va_list in memory)
+ VAARG_64
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
@@ -392,6 +408,16 @@ namespace llvm {
/// specifies a shuffle of elements that is suitable for input to PALIGNR.
bool isPALIGNRMask(ShuffleVectorSDNode *N);
+ /// isVEXTRACTF128Index - Return true if the specified
+ /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+ /// suitable for input to VEXTRACTF128.
+ bool isVEXTRACTF128Index(SDNode *N);
+
+ /// isVINSERTF128Index - Return true if the specified
+ /// INSERT_SUBVECTOR operand specifies a subvector insert that is
+ /// suitable for input to VINSERTF128.
+ bool isVINSERTF128Index(SDNode *N);
+
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
/// instructions.
@@ -409,6 +435,16 @@ namespace llvm {
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
unsigned getShufflePALIGNRImmediate(SDNode *N);
+ /// getExtractVEXTRACTF128Immediate - Return the appropriate
+ /// immediate to extract the specified EXTRACT_SUBVECTOR index
+ /// with VEXTRACTF128 instructions.
+ unsigned getExtractVEXTRACTF128Immediate(SDNode *N);
+
+ /// getInsertVINSERTF128Immediate - Return the appropriate
+ /// immediate to insert at the specified INSERT_SUBVECTOR index
+ /// with VINSERTF128 instructions.
+ unsigned getInsertVINSERTF128Immediate(SDNode *N);
+
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool isZeroNode(SDValue Elt);
@@ -425,16 +461,13 @@ namespace llvm {
public:
explicit X86TargetLowering(X86TargetMachine &TM);
- /// getPICBaseSymbol - Return the X86-32 PIC base.
- MCSymbol *getPICBaseSymbol(const MachineFunction *MF, MCContext &Ctx) const;
-
virtual unsigned getJumpTableEncoding() const;
virtual const MCExpr *
LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB, unsigned uid,
MCContext &Ctx) const;
-
+
/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
virtual SDValue getPICJumpTableRelocBase(SDValue Table,
@@ -442,7 +475,7 @@ namespace llvm {
virtual const MCExpr *
getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI, MCContext &Ctx) const;
-
+
/// getStackPtrReg - Return the stack pointer register we are using: either
/// ESP or RSP.
unsigned getStackPtrReg() const { return X86StackPtr; }
@@ -486,7 +519,7 @@ namespace llvm {
virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const;
-
+
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
/// isTypeDesirableForOp - Return true if the target has native support for
@@ -505,7 +538,7 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const;
-
+
/// getTargetNodeName - This method returns the name of a target specific
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
@@ -513,26 +546,36 @@ namespace llvm {
/// getSetCCResultType - Return the ISD::SETCC ValueType
virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
- /// computeMaskedBitsForTargetNode - Determine which of the bits specified
- /// in Mask are known to be either zero or one and return them in the
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
const APInt &Mask,
- APInt &KnownZero,
+ APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth = 0) const;
+ // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
+ // operation that are sign bits.
+ virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const;
+
virtual bool
isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
-
+
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
virtual bool ExpandInlineAsm(CallInst *CI) const;
-
+
ConstraintType getConstraintType(const std::string &Constraint) const;
-
- std::vector<unsigned>
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ virtual ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::vector<unsigned>
getRegClassForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
@@ -546,15 +589,15 @@ namespace llvm {
char ConstraintLetter,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
-
+
/// getRegForInlineAsmConstraint - Given a physical register constraint
/// (e.g. {edx}), return the register number and the register class for the
/// register. This should only be used for C_Register constraints. On
/// error, this returns a register number of 0.
- std::pair<unsigned, const TargetRegisterClass*>
+ std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
-
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
@@ -609,7 +652,7 @@ namespace llvm {
// shrink long double fp constant since fldt is very slow.
return !X86ScalarSSEf64 || VT == MVT::f80;
}
-
+
const X86Subtarget* getSubtarget() const {
return Subtarget;
}
@@ -650,8 +693,8 @@ namespace llvm {
/// X86StackPtr - X86 physical register used as stack ptr.
unsigned X86StackPtr;
-
- /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
/// floating point ops.
/// When SSE is available, use it for f32 operations.
/// When SSE2 is available, use it for f64 operations.
@@ -702,7 +745,6 @@ namespace llvm {
SDValue Chain, bool IsTailCall, bool Is64Bit,
int FPDiff, DebugLoc dl) const;
- CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const;
unsigned GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG &DAG) const;
@@ -719,6 +761,8 @@ namespace llvm {
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
@@ -729,7 +773,7 @@ namespace llvm {
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
- SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const;
+ SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
@@ -794,6 +838,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
virtual bool
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -810,6 +856,13 @@ namespace llvm {
MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
unsigned argNum, bool inMem) const;
+ /// Utility functions to emit monitor and mwait instructions. These
+ /// need to make sure that the arguments to the intrinsic are in the
+ /// correct registers.
+ MachineBasicBlock *EmitMonitor(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
+
/// Utility function to emit atomic bitwise operations (and, or, xor).
/// It takes the bitwise instruction to expand, the associated machine basic
/// block, and the associated X86 opcodes for reg/reg and reg/imm.
@@ -833,7 +886,7 @@ namespace llvm {
unsigned immOpcL,
unsigned immOpcH,
bool invSrc = false) const;
-
+
/// Utility function to emit atomic min and max. It takes the min/max
/// instruction to expand, the associated basic block, and the associated
/// cmov opcode for moving the min or max value.
@@ -841,6 +894,11 @@ namespace llvm {
MachineBasicBlock *BB,
unsigned cmovOpc) const;
+ // Utility function to emit the low-level va_arg code for X86-64.
+ MachineBasicBlock *EmitVAARG64WithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
/// Utility function to emit the xmm reg save portion of va_start.
MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
MachineInstr *BInstr,
@@ -849,12 +907,15 @@ namespace llvm {
MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI,
+ MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const;
-
+
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent, for use with the given x86 condition code.
SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
new file mode 100644
index 0000000..45d1c6b
--- /dev/null
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -0,0 +1,77 @@
+//====- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the 3DNow! instruction set, which extends MMX to support
+// floating point and also adds a few more random instructions for good measure.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: We don't support any intrinsics for these instructions yet.
+
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> {
+}
+
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic>
+ : I<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>,
+ TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode {
+ // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+ let isAsmParserOnly = 1;
+}
+
+
+let Constraints = "$src1 = $dst" in {
+ // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+ // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
+ multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>;
+ }
+}
+
+defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">;
+defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">;
+defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">;
+defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">;
+defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">;
+defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">;
+defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">;
+defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">;
+defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">;
+defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">;
+defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">;
+defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">;
+defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">;
+defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">;
+defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">;
+defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">;
+
+
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+
+def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
+ "prefetch $addr", []>;
+
+// FIXME: Diassembler gets a bogus decode conflict.
+let isAsmParserOnly = 1 in {
+def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
+ "prefetchw $addr", []>;
+}
+
+// "3DNowA" instructions
+defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">;
+defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">;
+defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">;
+defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">;
+defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">;
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
deleted file mode 100644
index 0884b61..0000000
--- a/lib/Target/X86/X86Instr64bit.td
+++ /dev/null
@@ -1,2250 +0,0 @@
-//====- X86Instr64bit.td - Describe X86-64 Instructions ----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the X86-64 instruction set, defining the instructions,
-// and properties of the instructions which are needed for code generation,
-// machine code emission, and analysis.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Operand Definitions.
-//
-
-// 64-bits but only 32 bits are significant.
-def i64i32imm : Operand<i64> {
- let ParserMatchClass = ImmSExti64i32AsmOperand;
-}
-
-// 64-bits but only 32 bits are significant, and those bits are treated as being
-// pc relative.
-def i64i32imm_pcrel : Operand<i64> {
- let PrintMethod = "print_pcrel_imm";
- let ParserMatchClass = X86AbsMemAsmOperand;
-}
-
-
-// 64-bits but only 8 bits are significant.
-def i64i8imm : Operand<i64> {
- let ParserMatchClass = ImmSExti64i8AsmOperand;
-}
-
-def lea64_32mem : Operand<i32> {
- let PrintMethod = "printi32mem";
- let AsmOperandLowerMethod = "lower_lea64_32mem";
- let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
-}
-
-
-// Special i64mem for addresses of load folding tail calls. These are not
-// allowed to use callee-saved registers since they must be scheduled
-// after callee-saved register are popped.
-def i64mem_TC : Operand<i64> {
- let PrintMethod = "printi64mem";
- let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
-}
-
-//===----------------------------------------------------------------------===//
-// Complex Pattern Definitions.
-//
-def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
- [add, sub, mul, X86mul_imm, shl, or, frameindex,
- X86WrapperRIP], []>;
-
-def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
- [tglobaltlsaddr], []>;
-
-//===----------------------------------------------------------------------===//
-// Pattern fragments.
-//
-
-def i64immSExt8 : PatLeaf<(i64 immSext8)>;
-
-def GetLo32XForm : SDNodeXForm<imm, [{
- // Transformation function: get the low 32 bits.
- return getI32Imm((unsigned)N->getZExtValue());
-}]>;
-
-def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
-
-
-def i64immZExt32 : PatLeaf<(i64 imm), [{
- // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
- // unsignedsign extended field.
- return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
-}]>;
-
-def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
-def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
-def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
-
-def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
-def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
-def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
-def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
-
-def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
-def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
-def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
-def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
-
-//===----------------------------------------------------------------------===//
-// Instruction list...
-//
-
-// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
-// a stack adjustment and the codegen must know that they may modify the stack
-// pointer before prolog-epilog rewriting occurs.
-// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
-// sub / add which can clobber EFLAGS.
-let Defs = [RSP, EFLAGS], Uses = [RSP] in {
-def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
- "#ADJCALLSTACKDOWN",
- [(X86callseq_start timm:$amt)]>,
- Requires<[In64BitMode]>;
-def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKUP",
- [(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[In64BitMode]>;
-}
-
-// Interrupt Instructions
-def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iret{q}", []>;
-
-//===----------------------------------------------------------------------===//
-// Call Instructions...
-//
-let isCall = 1 in
- // All calls clobber the non-callee saved registers. RSP is marked as
- // a use to prevent stack-pointer assignments that appear immediately
- // before calls from potentially appearing dead. Uses for argument
- // registers are added manually.
- let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [RSP] in {
-
- // NOTE: this pattern doesn't match "X86call imm", because we do not know
- // that the offset between an arbitrary immediate and the call will fit in
- // the 32-bit pcrel field that we have.
- def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
- "call{q}\t$dst", []>,
- Requires<[In64BitMode, NotWin64]>;
- def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
- "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
- Requires<[NotWin64]>;
- def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
- "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
- Requires<[NotWin64]>;
-
- def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
- "lcall{q}\t{*}$dst", []>;
- }
-
- // FIXME: We need to teach codegen about single list of call-clobbered
- // registers.
-let isCall = 1, isCodeGenOnly = 1 in
- // All calls clobber the non-callee saved registers. RSP is marked as
- // a use to prevent stack-pointer assignments that appear immediately
- // before calls from potentially appearing dead. Uses for argument
- // registers are added manually.
- let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
- Uses = [RSP] in {
- def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
- "call\t$dst", []>,
- Requires<[IsWin64]>;
- def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
- "call\t{*}$dst",
- [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
- def WINCALL64m : I<0xFF, MRM2m, (outs),
- (ins i64mem:$dst, variable_ops), "call\t{*}$dst",
- [(X86call (loadi64 addr:$dst))]>,
- Requires<[IsWin64]>;
- }
-
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
- isCodeGenOnly = 1 in
- let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [RSP] in {
- def TCRETURNdi64 : I<0, Pseudo, (outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
- def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset,
- variable_ops),
- "#TC_RETURN $dst $offset", []>;
- let mayLoad = 1 in
- def TCRETURNmi64 : I<0, Pseudo, (outs),
- (ins i64mem_TC:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
-
- def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
- (ins i64i32imm_pcrel:$dst, variable_ops),
- "jmp\t$dst # TAILCALL", []>;
- def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
- "jmp{q}\t{*}$dst # TAILCALL", []>;
-
- let mayLoad = 1 in
- def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
- "jmp{q}\t{*}$dst # TAILCALL", []>;
-}
-
-// Branches
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
- def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
- "jmp{q}\t$dst", []>;
- def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
- [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
- def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
- [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
- def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
- "ljmp{q}\t{*}$dst", []>;
-}
-
-//===----------------------------------------------------------------------===//
-// EH Pseudo Instructions
-//
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1, isCodeGenOnly = 1 in {
-def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
- "ret\t#eh_return, addr: $addr",
- [(X86ehret GR64:$addr)]>;
-
-}
-
-//===----------------------------------------------------------------------===//
-// Miscellaneous Instructions...
-//
-
-def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
-def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
-
-let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
-def LEAVE64 : I<0xC9, RawFrm,
- (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
-def POP64r : I<0x58, AddRegFrm,
- (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
-def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
-def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
-}
-let mayStore = 1 in {
-def PUSH64r : I<0x50, AddRegFrm,
- (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
-def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
-def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
-}
-}
-
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
- "push{q}\t$imm", []>;
-def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
- "push{q}\t$imm", []>;
-def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
- "push{q}\t$imm", []>;
-}
-
-let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
-def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
- Requires<[In64BitMode]>;
-let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
-def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
- Requires<[In64BitMode]>;
-
-def LEA64_32r : I<0x8D, MRMSrcMem,
- (outs GR32:$dst), (ins lea64_32mem:$src),
- "lea{l}\t{$src|$dst}, {$dst|$src}",
- [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
-
-let isReMaterializable = 1 in
-def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "lea{q}\t{$src|$dst}, {$dst|$src}",
- [(set GR64:$dst, lea64addr:$src)]>;
-
-let Constraints = "$src = $dst" in
-def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
- "bswap{q}\t$dst",
- [(set GR64:$dst, (bswap GR64:$src))]>, TB;
-
-// Bit scan instructions.
-let Defs = [EFLAGS] in {
-def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "bsf{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
-def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "bsf{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
-
-def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
-def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
-} // Defs = [EFLAGS]
-
-// Repeat string ops
-let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
-def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
- [(X86rep_movs i64)]>, REP;
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
-def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
- [(X86rep_stos i64)]>, REP;
-
-let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in
-def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
-
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
-def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
-
-def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
-
-def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
-
-// Fast system-call instructions
-def SYSEXIT64 : RI<0x35, RawFrm,
- (outs), (ins), "sysexit", []>, TB, Requires<[In64BitMode]>;
-
-//===----------------------------------------------------------------------===//
-// Move Instructions...
-//
-
-let neverHasSideEffects = 1 in
-def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
- "movabs{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, imm:$src)]>;
-def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, i64immSExt32:$src)]>;
-}
-
-// The assembler accepts movq of a 64-bit immediate as an alternate spelling of
-// movabsq.
-let isAsmParserOnly = 1 in {
-def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-}
-
-let isCodeGenOnly = 1 in {
-def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-}
-
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (load addr:$src))]>;
-
-def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- [(store GR64:$src, addr:$dst)]>;
-def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- [(store i64immSExt32:$src, addr:$dst)]>;
-
-/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC.
-let isCodeGenOnly = 1 in {
-let neverHasSideEffects = 1 in
-def MOV64rr_TC : RI<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-
-let mayLoad = 1,
- canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV64rm_TC : RI<0x8B, MRMSrcMem, (outs GR64_TC:$dst), (ins i64mem_TC:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- []>;
-
-let mayStore = 1 in
-def MOV64mr_TC : RI<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- []>;
-}
-
-// FIXME: These definitions are utterly broken
-// Just leave them commented out for now because they're useless outside
-// of the large code model, and most compilers won't generate the instructions
-// in question.
-/*
-def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
- "mov{q}\t{$src, %rax|%rax, $src}", []>;
-def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
- "mov{q}\t{$src, %rax|%rax, $src}", []>;
-def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
- "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
-def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
- "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
-*/
-
-// Moves to and from segment registers
-def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-
-// Moves to and from debug registers
-def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Moves to and from control registers
-def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Sign/Zero extenders
-
-// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
-// operand, which makes it a rare instruction with an 8-bit register
-// operand that can never access an h register. If support for h registers
-// were generalized, this would require a special register class.
-def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
- "movs{bq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR8:$src))]>, TB;
-def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
- "movs{bq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
-def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "movs{wq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR16:$src))]>, TB;
-def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "movs{wq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
-def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
- "movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR32:$src))]>;
-def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
- "movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
-
-// movzbq and movzwq encodings for the disassembler
-def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
- "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
- "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Use movzbl instead of movzbq when the destination is a register; it's
-// equivalent due to implicit zero-extending, and it has a smaller encoding.
-def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
- "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
-def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
- "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
-// Use movzwl instead of movzwq when the destination is a register; it's
-// equivalent due to implicit zero-extending, and it has a smaller encoding.
-def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
-def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
-
-// There's no movzlq instruction, but movl can be used for this purpose, using
-// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
-// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
-// zero-extension, however this isn't possible when the 32-bit value is
-// defined by a truncate or is copied from something where the high bits aren't
-// necessarily all zero. In such cases, we fall back to these explicit zext
-// instructions.
-def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
- "", [(set GR64:$dst, (zext GR32:$src))]>;
-def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
- "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
-
-// Any instruction that defines a 32-bit result leaves the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. And x86's cmov doesn't do anything if the
-// condition is false. But any other 32-bit operation will zero-extend
-// up to 64 bits.
-def def32 : PatLeaf<(i32 GR32:$src), [{
- return N->getOpcode() != ISD::TRUNCATE &&
- N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
- N->getOpcode() != ISD::CopyFromReg &&
- N->getOpcode() != X86ISD::CMOV;
-}]>;
-
-// In the case of a 32-bit def that is known to implicitly zero-extend,
-// we can use a SUBREG_TO_REG.
-def : Pat<(i64 (zext def32:$src)),
- (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
-
-let neverHasSideEffects = 1 in {
- let Defs = [RAX], Uses = [EAX] in
- def CDQE : RI<0x98, RawFrm, (outs), (ins),
- "{cltq|cdqe}", []>; // RAX = signext(EAX)
-
- let Defs = [RAX,RDX], Uses = [RAX] in
- def CQO : RI<0x99, RawFrm, (outs), (ins),
- "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
-}
-
-//===----------------------------------------------------------------------===//
-// Arithmetic Instructions...
-//
-
-let Defs = [EFLAGS] in {
-
-def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src),
- "add{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isConvertibleToThreeAddress = 1 in {
-let isCommutable = 1 in
-// Register-Register Addition
-def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86add_flag GR64:$src1, GR64:$src2))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
- def ADD64rr_alt : RI<0x03, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Integer Addition
-def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst),
- (ins GR64:$src1, i64i8imm:$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86add_flag GR64:$src1, i64immSExt8:$src2))]>;
-def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst),
- (ins GR64:$src1, i64i32imm:$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86add_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isConvertibleToThreeAddress
-
-// Register-Memory Addition
-def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86add_flag GR64:$src1, (load addr:$src2)))]>;
-
-} // Constraints = "$src1 = $dst"
-
-// Memory-Register Addition
-def ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR64:$src2), addr:$dst),
- (implicit EFLAGS)]>;
-def ADD64mi8 : RIi8<0x83, MRM0m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst),
- (implicit EFLAGS)]>;
-def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst),
- (implicit EFLAGS)]>;
-
-let Uses = [EFLAGS] in {
-
-def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src),
- "adc{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst),
- (ins GR64:$src1, GR64:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>;
-
-def ADC64ri8 : RIi8<0x83, MRM2r, (outs GR64:$dst),
- (ins GR64:$src1, i64i8imm:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>;
-def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst),
- (ins GR64:$src1, i64i32imm:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def ADC64mr : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>;
-def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i64immSExt8:$src2),
- addr:$dst)]>;
-def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i64immSExt32:$src2),
- addr:$dst)]>;
-} // Uses = [EFLAGS]
-
-let Constraints = "$src1 = $dst" in {
-// Register-Register Subtraction
-def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86sub_flag GR64:$src1, GR64:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Subtraction
-def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86sub_flag GR64:$src1, (load addr:$src2)))]>;
-
-// Register-Integer Subtraction
-def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst),
- (ins GR64:$src1, i64i8imm:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86sub_flag GR64:$src1, i64immSExt8:$src2))]>;
-def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
- (ins GR64:$src1, i64i32imm:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src),
- "sub{q}\t{$src, %rax|%rax, $src}", []>;
-
-// Memory-Register Subtraction
-def SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR64:$src2), addr:$dst),
- (implicit EFLAGS)]>;
-
-// Memory-Integer Subtraction
-def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), i64immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>;
-def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), i64immSExt32:$src2),
- addr:$dst),
- (implicit EFLAGS)]>;
-
-let Uses = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
-def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>;
-
-def SBB64ri8 : RIi8<0x83, MRM3r, (outs GR64:$dst),
- (ins GR64:$src1, i64i8imm:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>;
-def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst),
- (ins GR64:$src1, i64i32imm:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src),
- "sbb{q}\t{$src, %rax|%rax, $src}", []>;
-
-def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>;
-def SBB64mi8 : RIi8<0x83, MRM3m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
-def SBB64mi32 : RIi32<0x81, MRM3m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
-} // Uses = [EFLAGS]
-} // Defs = [EFLAGS]
-
-// Unsigned multiplication
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in {
-def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
- "mul{q}\t$src", []>; // RAX,RDX = RAX*GR64
-let mayLoad = 1 in
-def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
-
-// Signed multiplication
-def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src),
- "imul{q}\t$src", []>; // RAX,RDX = RAX*GR64
-let mayLoad = 1 in
-def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
-}
-
-let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-// Register-Register Signed Integer Multiplication
-def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "imul{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
-
-// Register-Memory Signed Integer Multiplication
-def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "imul{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
-} // Constraints = "$src1 = $dst"
-
-// Suprisingly enough, these are not two address instructions!
-
-// Register-Integer Signed Integer Multiplication
-def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
- (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
-def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
- (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
-
-// Memory-Integer Signed Integer Multiplication
-def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
- (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1),
- i64immSExt8:$src2))]>;
-def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
- (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1),
- i64immSExt32:$src2))]>;
-} // Defs = [EFLAGS]
-
-// Unsigned division / remainder
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in {
-// RDX:RAX/r64 = RAX,RDX
-def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
- "div{q}\t$src", []>;
-// Signed division / remainder
-// RDX:RAX/r64 = RAX,RDX
-def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
- "idiv{q}\t$src", []>;
-let mayLoad = 1 in {
-// RDX:RAX/[mem64] = RAX,RDX
-def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", []>;
-// RDX:RAX/[mem64] = RAX,RDX
-def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", []>;
-}
-}
-
-// Unary instructions
-let Defs = [EFLAGS], CodeSize = 2 in {
-let Constraints = "$src = $dst" in
-def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst",
- [(set GR64:$dst, (ineg GR64:$src)),
- (implicit EFLAGS)]>;
-def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
- [(store (ineg (loadi64 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
-
-let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in
-def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>;
-def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
- [(store (add (loadi64 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>;
-
-let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in
-def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>;
-def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
- [(store (add (loadi64 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>;
-
-// In 64-bit mode, single byte INC and DEC cannot be encoded.
-let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in {
-// Can transform into LEA.
-def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src),
- "inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>,
- OpSize, Requires<[In64BitMode]>;
-def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src),
- "inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>,
- Requires<[In64BitMode]>;
-def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src),
- "dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>,
- OpSize, Requires<[In64BitMode]>;
-def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src),
- "dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
- Requires<[In64BitMode]>;
-} // Constraints = "$src = $dst", isConvertibleToThreeAddress
-
-// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
-// how to unfold them.
-def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
- [(store (add (loadi16 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In64BitMode]>;
-def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
- [(store (add (loadi32 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In64BitMode]>;
-def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
- [(store (add (loadi16 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In64BitMode]>;
-def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
- [(store (add (loadi32 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In64BitMode]>;
-} // Defs = [EFLAGS], CodeSize
-
-
-let Defs = [EFLAGS] in {
-// Shift instructions
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
- "shl{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (shl GR64:$src1, CL))]>;
-let isConvertibleToThreeAddress = 1 in // Can transform into LEA.
-def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
- "shl{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
-// NOTE: We don't include patterns for shifts of a register by one, because
-// 'add reg,reg' is cheaper.
-def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
- "shl{q}\t$dst", []>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
- "shl{q}\t{%cl, $dst|$dst, %CL}",
- [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
-def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
- "shl{q}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
- "shl{q}\t$dst",
- [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
- "shr{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (srl GR64:$src1, CL))]>;
-def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
- "shr{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
-def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
- "shr{q}\t$dst",
- [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
- "shr{q}\t{%cl, $dst|$dst, %CL}",
- [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
-def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
- "shr{q}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
- "shr{q}\t$dst",
- [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
- "sar{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (sra GR64:$src1, CL))]>;
-def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
- "sar{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
-def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
- "sar{q}\t$dst",
- [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src = $dst"
-
-let Uses = [CL] in
-def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
- "sar{q}\t{%cl, $dst|$dst, %CL}",
- [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
-def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
- "sar{q}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
- "sar{q}\t$dst",
- [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-// Rotate instructions
-
-let Constraints = "$src = $dst" in {
-def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src),
- "rcl{q}\t{1, $dst|$dst, 1}", []>;
-def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
- "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src),
- "rcr{q}\t{1, $dst|$dst, 1}", []>;
-def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
- "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
- "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src),
- "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
-}
-} // Constraints = "$src = $dst"
-
-def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
- "rcl{q}\t{1, $dst|$dst, 1}", []>;
-def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
- "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
- "rcr{q}\t{1, $dst|$dst, 1}", []>;
-def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
- "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
- "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
- "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
-}
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
- "rol{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
-def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
- "rol{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
-def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
- "rol{q}\t$dst",
- [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
- "rol{q}\t{%cl, $dst|$dst, %CL}",
- [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
-def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src),
- "rol{q}\t{$src, $dst|$dst, $src}",
- [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
- "rol{q}\t$dst",
- [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
- "ror{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
-def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
- "ror{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
-def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
- "ror{q}\t$dst",
- [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
- "ror{q}\t{%cl, $dst|$dst, %CL}",
- [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
-def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
- "ror{q}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
- "ror{q}\t$dst",
- [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-// Double shift instructions (generalizations of rotate)
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in {
-def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
- [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>,
- TB;
-def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
- [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
- TB;
-}
-
-let isCommutable = 1 in { // FIXME: Update X86InstrInfo::commuteInstruction
-def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
- (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2, i8imm:$src3),
- "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
- (i8 imm:$src3)))]>,
- TB;
-def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
- (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2, i8imm:$src3),
- "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
- (i8 imm:$src3)))]>,
- TB;
-} // isCommutable
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in {
-def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
- [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
- addr:$dst)]>, TB;
-def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
- [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
- addr:$dst)]>, TB;
-}
-def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
- (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
- "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB;
-def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
- (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
- "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB;
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-// Logical Instructions...
-//
-
-let Constraints = "$src = $dst" , AddedComplexity = 15 in
-def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst",
- [(set GR64:$dst, (not GR64:$src))]>;
-def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
- [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
-
-let Defs = [EFLAGS] in {
-def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src),
- "and{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def AND64rr : RI<0x21, MRMDestReg,
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86and_flag GR64:$src1, GR64:$src2))]>;
-let isCodeGenOnly = 1 in {
-def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "and{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-def AND64rm : RI<0x23, MRMSrcMem,
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86and_flag GR64:$src1, (load addr:$src2)))]>;
-def AND64ri8 : RIi8<0x83, MRM4r,
- (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
- "and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86and_flag GR64:$src1, i64immSExt8:$src2))]>;
-def AND64ri32 : RIi32<0x81, MRM4r,
- (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
- "and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86and_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def AND64mr : RI<0x21, MRMDestMem,
- (outs), (ins i64mem:$dst, GR64:$src),
- "and{q}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), GR64:$src), addr:$dst),
- (implicit EFLAGS)]>;
-def AND64mi8 : RIi8<0x83, MRM4m,
- (outs), (ins i64mem:$dst, i64i8imm :$src),
- "and{q}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-def AND64mi32 : RIi32<0x81, MRM4m,
- (outs), (ins i64mem:$dst, i64i32imm:$src),
- "and{q}\t{$src, $dst|$dst, $src}",
- [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86or_flag GR64:$src1, GR64:$src2))]>;
-let isCodeGenOnly = 1 in {
-def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "or{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86or_flag GR64:$src1, (load addr:$src2)))]>;
-def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst),
- (ins GR64:$src1, i64i8imm:$src2),
- "or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86or_flag GR64:$src1, i64immSExt8:$src2))]>;
-def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst),
- (ins GR64:$src1, i64i32imm:$src2),
- "or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86or_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "or{q}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), GR64:$src), addr:$dst),
- (implicit EFLAGS)]>;
-def OR64mi8 : RIi8<0x83, MRM1m, (outs), (ins i64mem:$dst, i64i8imm:$src),
- "or{q}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "or{q}\t{$src, $dst|$dst, $src}",
- [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
-def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src),
- "or{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86xor_flag GR64:$src1, GR64:$src2))]>;
-let isCodeGenOnly = 1 in {
-def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "xor{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86xor_flag GR64:$src1, (load addr:$src2)))]>;
-def XOR64ri8 : RIi8<0x83, MRM6r, (outs GR64:$dst),
- (ins GR64:$src1, i64i8imm:$src2),
- "xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86xor_flag GR64:$src1, i64immSExt8:$src2))]>;
-def XOR64ri32 : RIi32<0x81, MRM6r,
- (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
- "xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "xor{q}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), GR64:$src), addr:$dst),
- (implicit EFLAGS)]>;
-def XOR64mi8 : RIi8<0x83, MRM6m, (outs), (ins i64mem:$dst, i64i8imm :$src),
- "xor{q}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "xor{q}\t{$src, $dst|$dst, $src}",
- [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
-def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src),
- "xor{q}\t{$src, %rax|%rax, $src}", []>;
-
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-// Comparison Instructions...
-//
-
-// Integer comparison
-let Defs = [EFLAGS] in {
-def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src),
- "test{q}\t{$src, %rax|%rax, $src}", []>;
-let isCommutable = 1 in
-def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
- "test{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR64:$src1, GR64:$src2), 0))]>;
-def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
- "test{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR64:$src1, (loadi64 addr:$src2)),
- 0))]>;
-def TEST64ri32 : RIi32<0xF7, MRM0r, (outs),
- (ins GR64:$src1, i64i32imm:$src2),
- "test{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR64:$src1, i64immSExt32:$src2),
- 0))]>;
-def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
- (ins i64mem:$src1, i64i32imm:$src2),
- "test{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and (loadi64 addr:$src1),
- i64immSExt32:$src2), 0))]>;
-
-
-def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src),
- "cmp{q}\t{$src, %rax|%rax, $src}", []>;
-def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR64:$src1, GR64:$src2))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
- def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}", []>;
-}
-
-def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi64 addr:$src1), GR64:$src2))]>;
-def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR64:$src1, (loadi64 addr:$src2)))]>;
-def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt8:$src2))]>;
-def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt32:$src2))]>;
-def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi64 addr:$src1),
- i64immSExt8:$src2))]>;
-def CMP64mi32 : RIi32<0x81, MRM7m, (outs),
- (ins i64mem:$src1, i64i32imm:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi64 addr:$src1),
- i64immSExt32:$src2))]>;
-} // Defs = [EFLAGS]
-
-// Bit tests.
-// TODO: BTC, BTR, and BTS
-let Defs = [EFLAGS] in {
-def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
-
-// Unlike with the register+register form, the memory+register form of the
-// bt instruction does not ignore the high bits of the index. From ISel's
-// perspective, this is pretty bizarre. Disable these instructions for now.
-def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
-// [(X86bt (loadi64 addr:$src1), GR64:$src2),
-// (implicit EFLAGS)]
- []
- >, TB;
-
-def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
-// Note that these instructions don't need FastBTMem because that
-// only applies when the other operand is in a register. When it's
-// an immediate, bt is still fast.
-def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt (loadi64 addr:$src1),
- i64immSExt8:$src2))]>, TB;
-
-def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-
-def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-
-def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-} // Defs = [EFLAGS]
-
-// Conditional moves
-let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in {
-def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovb{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_B, EFLAGS))]>, TB;
-def CMOVAE64rr: RI<0x43, MRMSrcReg, // if >=u, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovae{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_AE, EFLAGS))]>, TB;
-def CMOVE64rr : RI<0x44, MRMSrcReg, // if ==, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmove{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_E, EFLAGS))]>, TB;
-def CMOVNE64rr: RI<0x45, MRMSrcReg, // if !=, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovne{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_NE, EFLAGS))]>, TB;
-def CMOVBE64rr: RI<0x46, MRMSrcReg, // if <=u, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovbe{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_BE, EFLAGS))]>, TB;
-def CMOVA64rr : RI<0x47, MRMSrcReg, // if >u, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmova{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_A, EFLAGS))]>, TB;
-def CMOVL64rr : RI<0x4C, MRMSrcReg, // if <s, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovl{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_L, EFLAGS))]>, TB;
-def CMOVGE64rr: RI<0x4D, MRMSrcReg, // if >=s, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovge{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_GE, EFLAGS))]>, TB;
-def CMOVLE64rr: RI<0x4E, MRMSrcReg, // if <=s, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovle{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_LE, EFLAGS))]>, TB;
-def CMOVG64rr : RI<0x4F, MRMSrcReg, // if >s, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovg{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_G, EFLAGS))]>, TB;
-def CMOVS64rr : RI<0x48, MRMSrcReg, // if signed, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovs{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_S, EFLAGS))]>, TB;
-def CMOVNS64rr: RI<0x49, MRMSrcReg, // if !signed, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovns{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_NS, EFLAGS))]>, TB;
-def CMOVP64rr : RI<0x4A, MRMSrcReg, // if parity, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovp{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_P, EFLAGS))]>, TB;
-def CMOVNP64rr : RI<0x4B, MRMSrcReg, // if !parity, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovnp{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_NP, EFLAGS))]>, TB;
-def CMOVO64rr : RI<0x40, MRMSrcReg, // if overflow, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovo{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_O, EFLAGS))]>, TB;
-def CMOVNO64rr : RI<0x41, MRMSrcReg, // if !overflow, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovno{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_NO, EFLAGS))]>, TB;
-} // isCommutable = 1
-
-def CMOVB64rm : RI<0x42, MRMSrcMem, // if <u, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovb{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_B, EFLAGS))]>, TB;
-def CMOVAE64rm: RI<0x43, MRMSrcMem, // if >=u, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovae{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_AE, EFLAGS))]>, TB;
-def CMOVE64rm : RI<0x44, MRMSrcMem, // if ==, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmove{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_E, EFLAGS))]>, TB;
-def CMOVNE64rm: RI<0x45, MRMSrcMem, // if !=, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovne{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_NE, EFLAGS))]>, TB;
-def CMOVBE64rm: RI<0x46, MRMSrcMem, // if <=u, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovbe{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_BE, EFLAGS))]>, TB;
-def CMOVA64rm : RI<0x47, MRMSrcMem, // if >u, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmova{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_A, EFLAGS))]>, TB;
-def CMOVL64rm : RI<0x4C, MRMSrcMem, // if <s, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovl{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_L, EFLAGS))]>, TB;
-def CMOVGE64rm: RI<0x4D, MRMSrcMem, // if >=s, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovge{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_GE, EFLAGS))]>, TB;
-def CMOVLE64rm: RI<0x4E, MRMSrcMem, // if <=s, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovle{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_LE, EFLAGS))]>, TB;
-def CMOVG64rm : RI<0x4F, MRMSrcMem, // if >s, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovg{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_G, EFLAGS))]>, TB;
-def CMOVS64rm : RI<0x48, MRMSrcMem, // if signed, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovs{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_S, EFLAGS))]>, TB;
-def CMOVNS64rm: RI<0x49, MRMSrcMem, // if !signed, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovns{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_NS, EFLAGS))]>, TB;
-def CMOVP64rm : RI<0x4A, MRMSrcMem, // if parity, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovp{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_P, EFLAGS))]>, TB;
-def CMOVNP64rm : RI<0x4B, MRMSrcMem, // if !parity, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovnp{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_NP, EFLAGS))]>, TB;
-def CMOVO64rm : RI<0x40, MRMSrcMem, // if overflow, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovo{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_O, EFLAGS))]>, TB;
-def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovno{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_NO, EFLAGS))]>, TB;
-} // Constraints = "$src1 = $dst"
-
-// Use sbb to materialize carry flag into a GPR.
-// FIXME: This are pseudo ops that should be replaced with Pat<> patterns.
-// However, Pat<> can't replicate the destination reg into the inputs of the
-// result.
-// FIXME: Change this to have encoding Pseudo when X86MCCodeEmitter replaces
-// X86CodeEmitter.
-let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in
-def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-
-def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C64r)>;
-
-//===----------------------------------------------------------------------===//
-// Descriptor-table support instructions
-
-// LLDT is not interpreted specially in 64-bit mode because there is no sign
-// extension.
-def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
- "sldt{q}\t$dst", []>, TB;
-def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
- "sldt{q}\t$dst", []>, TB;
-
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
-// smaller encoding, but doing so at isel time interferes with rematerialization
-// in the current register allocator. For now, this is rewritten when the
-// instruction is lowered to an MCInst.
-// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
-// when we have a better way to specify isel priority.
-let Defs = [EFLAGS],
- AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, 0)]>;
-
-// Materialize i64 constant where top 32-bits are zero. This could theoretically
-// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
-// that would make it more difficult to rematerialize.
-let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
- "", [(set GR64:$dst, i64immZExt32:$src)]>;
-
-//===----------------------------------------------------------------------===//
-// Thread Local Storage Instructions
-//===----------------------------------------------------------------------===//
-
-// ELF TLS Support
-// All calls clobber the non-callee saved registers. RSP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
-let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [RSP] in
-def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
- ".byte\t0x66; "
- "leaq\t$sym(%rip), %rdi; "
- ".word\t0x6666; "
- "rex64; "
- "call\t__tls_get_addr@PLT",
- [(X86tlsaddr tls64addr:$sym)]>,
- Requires<[In64BitMode]>;
-
-// Darwin TLS Support
-// For x86_64, the address of the thunk is passed in %rdi, on return
-// the address of the variable is in %rax. All other registers are preserved.
-let Defs = [RAX],
- Uses = [RDI],
- usesCustomInserter = 1 in
-def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
- "# TLSCall_64",
- [(X86TLSCall addr:$sym)]>,
- Requires<[In64BitMode]>;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "movq\t%gs:$src, $dst",
- [(set GR64:$dst, (gsload addr:$src))]>, SegGS;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "movq\t%fs:$src, $dst",
- [(set GR64:$dst, (fsload addr:$src))]>, SegFS;
-
-//===----------------------------------------------------------------------===//
-// Atomic Instructions
-//===----------------------------------------------------------------------===//
-
-// TODO: Get this to fold the constant into the instruction.
-let hasSideEffects = 1, Defs = [ESP] in
-def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
- "lock\n\t"
- "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
- [(X86MemBarrierNoSSE GR64:$zero)]>,
- Requires<[In64BitMode]>, LOCK;
-
-let Defs = [RAX, EFLAGS], Uses = [RAX] in {
-def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
- "lock\n\t"
- "cmpxchgq\t$swap,$ptr",
- [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
-}
-
-let Constraints = "$val = $dst" in {
-let Defs = [EFLAGS] in
-def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
- "lock\n\t"
- "xadd\t$val, $ptr",
- [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
- TB, LOCK;
-
-def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$val,i64mem:$ptr),
- "xchg{q}\t{$val, $ptr|$ptr, $val}",
- [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
-
-def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
- "xchg{q}\t{$val, $src|$src, $val}", []>;
-}
-
-def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
-let mayLoad = 1, mayStore = 1 in
-def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
-let mayLoad = 1, mayStore = 1 in
-def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
-def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
- "cmpxchg16b\t$dst", []>, TB;
-
-def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
- "xchg{q}\t{$src, %rax|%rax, $src}", []>;
-
-// Optimized codegen when the non-memory output is not used.
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
-// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "lock\n\t"
- "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
- (ins i64mem:$dst, i64i8imm :$src2),
- "lock\n\t"
- "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
- (ins i64mem:$dst, i64i32imm :$src2),
- "lock\n\t"
- "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "lock\n\t"
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
- (ins i64mem:$dst, i64i8imm :$src2),
- "lock\n\t"
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
- (ins i64mem:$dst, i64i32imm:$src2),
- "lock\n\t"
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
- "lock\n\t"
- "inc{q}\t$dst", []>, LOCK;
-def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
- "lock\n\t"
- "dec{q}\t$dst", []>, LOCK;
-}
-// Atomic exchange, and, or, xor
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomInserter = 1 in {
-def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMAND64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
-def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMOR64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
-def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMXOR64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
-def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMNAND64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
-def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
- "#ATOMMIN64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
-def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMMAX64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
-def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMUMIN64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
-def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMUMAX64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
-}
-
-// Segmentation support instructions
-
-// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
-def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
- "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
-
-def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
- "push{q}\t%fs", []>, TB;
-def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
- "push{q}\t%gs", []>, TB;
-
-def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
- "pop{q}\t%fs", []>, TB;
-def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
- "pop{q}\t%gs", []>, TB;
-
-def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
- "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
- "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
- "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Specialized register support
-
-// no m form encodable; use SMSW16m
-def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
- "smsw{q}\t$dst", []>, TB;
-
-// String manipulation instructions
-
-def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
-// code model mode, should use 'movabs'. FIXME: This is really a hack, the
-// 'movabs' predicate should handle this sort of thing.
-def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
- (MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
- (MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
- (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
- (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
- (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
-
-// In static codegen with small code model, we can get the address of a label
-// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of
-// the MOV64ri64i32 should accept these.
-def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
- (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
- (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
- (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
- (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
- (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
-
-// In kernel code model, we can get the address of a label
-// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
-// the MOV64ri32 should accept these.
-def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
- (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
- (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
- (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
- (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
- (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
-
-// If we have small model and -static mode, it is safe to store global addresses
-// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
-// for MOV64mi32 should handle this sort of thing.
-def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, tconstpool:$src)>,
- Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, tjumptable:$src)>,
- Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
- Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, texternalsym:$src)>,
- Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, tblockaddress:$src)>,
- Requires<[NearData, IsStatic]>;
-
-// Calls
-// Direct PC relative function call for small code model. 32-bit displacement
-// sign extended to 64-bit.
-def : Pat<(X86call (i64 tglobaladdr:$dst)),
- (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
-def : Pat<(X86call (i64 texternalsym:$dst)),
- (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
-
-def : Pat<(X86call (i64 tglobaladdr:$dst)),
- (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
-def : Pat<(X86call (i64 texternalsym:$dst)),
- (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
-
-// tailcall stuff
-def : Pat<(X86tcret GR64_TC:$dst, imm:$off),
- (TCRETURNri64 GR64_TC:$dst, imm:$off)>,
- Requires<[In64BitMode]>;
-
-def : Pat<(X86tcret (load addr:$dst), imm:$off),
- (TCRETURNmi64 addr:$dst, imm:$off)>,
- Requires<[In64BitMode]>;
-
-def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
- (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
- Requires<[In64BitMode]>;
-
-def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
- (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
- Requires<[In64BitMode]>;
-
-// tls has some funny stuff here...
-// This corresponds to movabs $foo@tpoff, %rax
-def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
- (MOV64ri tglobaltlsaddr :$dst)>;
-// This corresponds to add $foo@tpoff, %rax
-def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
- (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
-// This corresponds to mov foo@tpoff(%rbx), %eax
-def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
- (MOV64rm tglobaltlsaddr :$dst)>;
-
-// Comparisons.
-
-// TEST R,R is smaller than CMP R,0
-def : Pat<(X86cmp GR64:$src1, 0),
- (TEST64rr GR64:$src1, GR64:$src1)>;
-
-// Conditional moves with folded loads with operands swapped and conditions
-// inverted.
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_B, EFLAGS),
- (CMOVAE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_AE, EFLAGS),
- (CMOVB64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_E, EFLAGS),
- (CMOVNE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NE, EFLAGS),
- (CMOVE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_BE, EFLAGS),
- (CMOVA64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_A, EFLAGS),
- (CMOVBE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_L, EFLAGS),
- (CMOVGE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_GE, EFLAGS),
- (CMOVL64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_LE, EFLAGS),
- (CMOVG64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_G, EFLAGS),
- (CMOVLE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_P, EFLAGS),
- (CMOVNP64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NP, EFLAGS),
- (CMOVP64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_S, EFLAGS),
- (CMOVNS64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NS, EFLAGS),
- (CMOVS64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS),
- (CMOVNO64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS),
- (CMOVO64rm GR64:$src2, addr:$src1)>;
-
-// zextload bool -> zextload byte
-def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
-
-// extload
-// When extloading from 16-bit and smaller memory locations into 64-bit
-// registers, use zero-extending loads so that the entire 64-bit register is
-// defined, avoiding partial-register updates.
-def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
-def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
-def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
-// For other extloads, use subregs, since the high contents of the register are
-// defined after an extload.
-def : Pat<(extloadi64i32 addr:$src),
- (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
- sub_32bit)>;
-
-// anyext. Define these to do an explicit zero-extend to
-// avoid partial-register updates.
-def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
-def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
-def : Pat<(i64 (anyext GR32:$src)),
- (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
-
-//===----------------------------------------------------------------------===//
-// Some peepholes
-//===----------------------------------------------------------------------===//
-
-// Odd encoding trick: -128 fits into an 8-bit immediate field while
-// +128 doesn't, so in this special case use a sub instead of an add.
-def : Pat<(add GR64:$src1, 128),
- (SUB64ri8 GR64:$src1, -128)>;
-def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
- (SUB64mi8 addr:$dst, -128)>;
-
-// The same trick applies for 32-bit immediate fields in 64-bit
-// instructions.
-def : Pat<(add GR64:$src1, 0x0000000080000000),
- (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
-def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
- (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
-
-// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it
-// has an immediate with at least 32 bits of leading zeros, to avoid needing to
-// materialize that immediate in a register first.
-def : Pat<(and GR64:$src, i64immZExt32:$imm),
- (SUBREG_TO_REG
- (i64 0),
- (AND32ri
- (EXTRACT_SUBREG GR64:$src, sub_32bit),
- (i32 (GetLo32XForm imm:$imm))),
- sub_32bit)>;
-
-// r & (2^32-1) ==> movz
-def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
- (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
-// r & (2^16-1) ==> movz
-def : Pat<(and GR64:$src, 0xffff),
- (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR64:$src, 0xff),
- (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR32:$src1, 0xff),
- (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
- Requires<[In64BitMode]>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
- Requires<[In64BitMode]>;
-
-// sext_inreg patterns
-def : Pat<(sext_inreg GR64:$src, i32),
- (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
-def : Pat<(sext_inreg GR64:$src, i16),
- (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
-def : Pat<(sext_inreg GR64:$src, i8),
- (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
-def : Pat<(sext_inreg GR32:$src, i8),
- (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
- Requires<[In64BitMode]>;
-def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
- Requires<[In64BitMode]>;
-
-// trunc patterns
-def : Pat<(i32 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
-def : Pat<(i16 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
-def : Pat<(i8 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
-def : Pat<(i8 (trunc GR32:$src)),
- (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
- Requires<[In64BitMode]>;
-def : Pat<(i8 (trunc GR16:$src)),
- (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
- Requires<[In64BitMode]>;
-
-// h-register tricks.
-// For now, be conservative on x86-64 and use an h-register extract only if the
-// value is immediately zero-extended or stored, which are somewhat common
-// cases. This uses a bunch of code to prevent a register requiring a REX prefix
-// from being allocated in the same instruction as the h register, as there's
-// currently no way to describe this requirement to the register allocator.
-
-// h-register extract and zero-extend.
-def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
- (SUBREG_TO_REG
- (i64 0),
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
- sub_8bit_hi)),
- sub_32bit)>;
-def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- sub_8bit_hi))>,
- Requires<[In64BitMode]>;
-def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
- (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
- GR32_ABCD)),
- sub_8bit_hi))>,
- Requires<[In64BitMode]>;
-def : Pat<(srl GR16:$src, (i8 8)),
- (EXTRACT_SUBREG
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi)),
- sub_16bit)>,
- Requires<[In64BitMode]>;
-def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi))>,
- Requires<[In64BitMode]>;
-def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi))>,
- Requires<[In64BitMode]>;
-def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
- (SUBREG_TO_REG
- (i64 0),
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi)),
- sub_32bit)>;
-def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
- (SUBREG_TO_REG
- (i64 0),
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi)),
- sub_32bit)>;
-
-// h-register extract and store.
-def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
- (MOV8mr_NOREX
- addr:$dst,
- (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
- sub_8bit_hi))>;
-def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
- (MOV8mr_NOREX
- addr:$dst,
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- sub_8bit_hi))>,
- Requires<[In64BitMode]>;
-def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
- (MOV8mr_NOREX
- addr:$dst,
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi))>,
- Requires<[In64BitMode]>;
-
-// (shl x, 1) ==> (add x, x)
-def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
-
-// (shl x (and y, 63)) ==> (shl x, y)
-def : Pat<(shl GR64:$src1, (and CL, 63)),
- (SHL64rCL GR64:$src1)>;
-def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
- (SHL64mCL addr:$dst)>;
-
-def : Pat<(srl GR64:$src1, (and CL, 63)),
- (SHR64rCL GR64:$src1)>;
-def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
- (SHR64mCL addr:$dst)>;
-
-def : Pat<(sra GR64:$src1, (and CL, 63)),
- (SAR64rCL GR64:$src1)>;
-def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
- (SAR64mCL addr:$dst)>;
-
-// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in { // Try this before the selecting to OR
-def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
- (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
- (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(or_is_add GR64:$src1, GR64:$src2),
- (ADD64rr GR64:$src1, GR64:$src2)>;
-} // AddedComplexity
-
-// X86 specific add which produces a flag.
-def : Pat<(addc GR64:$src1, GR64:$src2),
- (ADD64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(addc GR64:$src1, (load addr:$src2)),
- (ADD64rm GR64:$src1, addr:$src2)>;
-def : Pat<(addc GR64:$src1, i64immSExt8:$src2),
- (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(addc GR64:$src1, i64immSExt32:$src2),
- (ADD64ri32 GR64:$src1, imm:$src2)>;
-
-def : Pat<(subc GR64:$src1, GR64:$src2),
- (SUB64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(subc GR64:$src1, (load addr:$src2)),
- (SUB64rm GR64:$src1, addr:$src2)>;
-def : Pat<(subc GR64:$src1, i64immSExt8:$src2),
- (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(subc GR64:$src1, imm:$src2),
- (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-//===----------------------------------------------------------------------===//
-// EFLAGS-defining Patterns
-//===----------------------------------------------------------------------===//
-
-// addition
-def : Pat<(add GR64:$src1, GR64:$src2),
- (ADD64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(add GR64:$src1, i64immSExt8:$src2),
- (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(add GR64:$src1, i64immSExt32:$src2),
- (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
- (ADD64rm GR64:$src1, addr:$src2)>;
-
-// subtraction
-def : Pat<(sub GR64:$src1, GR64:$src2),
- (SUB64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
- (SUB64rm GR64:$src1, addr:$src2)>;
-def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
- (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
- (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Multiply
-def : Pat<(mul GR64:$src1, GR64:$src2),
- (IMUL64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
- (IMUL64rm GR64:$src1, addr:$src2)>;
-def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
- (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
- (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
- (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
-def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
- (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
-
-// inc/dec
-def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
-def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
-
-// or
-def : Pat<(or GR64:$src1, GR64:$src2),
- (OR64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(or GR64:$src1, i64immSExt8:$src2),
- (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(or GR64:$src1, i64immSExt32:$src2),
- (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
- (OR64rm GR64:$src1, addr:$src2)>;
-
-// xor
-def : Pat<(xor GR64:$src1, GR64:$src2),
- (XOR64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
- (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
- (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
- (XOR64rm GR64:$src1, addr:$src2)>;
-
-// and
-def : Pat<(and GR64:$src1, GR64:$src2),
- (AND64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(and GR64:$src1, i64immSExt8:$src2),
- (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(and GR64:$src1, i64immSExt32:$src2),
- (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
- (AND64rm GR64:$src1, addr:$src2)>;
-
-//===----------------------------------------------------------------------===//
-// X86-64 SSE Instructions
-//===----------------------------------------------------------------------===//
-
-// Move instructions...
-
-def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (scalar_to_vector GR64:$src)))]>;
-def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
- (iPTR 0)))]>;
-
-def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert GR64:$src))]>;
-def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
-
-def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bitconvert FR64:$src))]>;
-def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
-
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
new file mode 100644
index 0000000..f0ea068
--- /dev/null
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -0,0 +1,1125 @@
+//===- X86InstrArithmetic.td - Integer Arithmetic Instrs ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the integer arithmetic instructions in the X86
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LEA - Load Effective Address
+
+let neverHasSideEffects = 1 in
+def LEA16r : I<0x8D, MRMSrcMem,
+ (outs GR16:$dst), (ins i32mem:$src),
+ "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
+let isReMaterializable = 1 in
+def LEA32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins i32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
+
+def LEA64_32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins lea64_32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+
+let isReMaterializable = 1 in
+def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lea{q}\t{$src|$dst}, {$dst|$src}",
+ [(set GR64:$dst, lea64addr:$src)]>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Fixed-Register Multiplication and Division Instructions.
+//
+
+// Extra precision multiplication
+
+// AL is really implied by AX, but the registers in Defs must match the
+// SDNode results (i8, i32).
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, GR8:$src)),
+ (implicit EFLAGS)]>; // AL,AH = AL*GR8
+
+let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
+def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*GR16
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
+def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
+ "mul{l}\t$src", // EAX,EDX = EAX*GR32
+ [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
+ "mul{q}\t$src", // RAX,RDX = RAX*GR64
+ [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
+
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
+ "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, (loadi8 addr:$src))),
+ (implicit EFLAGS)]>; // AL,AH = AL*[mem8]
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*[mem16]
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
+ "mul{l}\t$src",
+ []>; // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
+ "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+}
+
+let neverHasSideEffects = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>;
+ // AL,AH = AL*GR8
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
+ OpSize; // AX,DX = AX*GR16
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>;
+ // EAX,EDX = EAX*GR32
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>;
+ // RAX,RDX = RAX*GR64
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
+ "imul{b}\t$src", []>; // AL,AH = AL*[mem8]
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
+ "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
+ "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
+ "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+}
+} // neverHasSideEffects
+
+
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+
+let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
+// Register-Register Signed Integer Multiply
+def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
+def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
+def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
+}
+
+// Register-Memory Signed Integer Multiply
+def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
+ TB, OpSize;
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
+def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i64mem:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
+} // Constraints = "$src1 = $dst"
+
+} // Defs = [EFLAGS]
+
+// Suprisingly enough, these are not two address instructions!
+let Defs = [EFLAGS] in {
+// Register-Integer Signed Integer Multiply
+def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
+def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, imm:$src2))]>;
+def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
+def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
+def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
+
+
+// Memory-Integer Signed Integer Multiply
+def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
+ (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))]>,
+ OpSize;
+def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
+ (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i16immSExt8:$src2))]>, OpSize;
+def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
+ (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))]>;
+def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
+ (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i32immSExt8:$src2))]>;
+def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
+ (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt32:$src2))]>;
+def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
+ (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt8:$src2))]>;
+} // Defs = [EFLAGS]
+
+
+
+
+// unsigned division/remainder
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "div{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
+ "div{q}\t$src", []>;
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
+def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
+ "div{l}\t$src", []>;
+// RDX:RAX/[mem64] = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
+ "div{q}\t$src", []>;
+}
+
+// Signed division/remainder.
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "idiv{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
+ "idiv{q}\t$src", []>;
+
+let mayLoad = 1, mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
+ "idiv{l}\t$src", []>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
+def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
+ "idiv{q}\t$src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Two address Instructions.
+//
+
+// unary instructions
+let CodeSize = 2 in {
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "neg{b}\t$dst",
+ [(set GR8:$dst, (ineg GR8:$src1)),
+ (implicit EFLAGS)]>;
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "neg{w}\t$dst",
+ [(set GR16:$dst, (ineg GR16:$src1)),
+ (implicit EFLAGS)]>, OpSize;
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "neg{l}\t$dst",
+ [(set GR32:$dst, (ineg GR32:$src1)),
+ (implicit EFLAGS)]>;
+def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
+ [(set GR64:$dst, (ineg GR64:$src1)),
+ (implicit EFLAGS)]>;
+} // Constraints = "$src1 = $dst"
+
+def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
+ "neg{b}\t$dst",
+ [(store (ineg (loadi8 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
+ "neg{w}\t$dst",
+ [(store (ineg (loadi16 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
+ "neg{l}\t$dst",
+ [(store (ineg (loadi32 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
+ [(store (ineg (loadi64 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+
+// Note: NOT does not set EFLAGS!
+
+let Constraints = "$src1 = $dst" in {
+// Match xor -1 to not. Favors these over a move imm + xor to save code size.
+let AddedComplexity = 15 in {
+def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "not{b}\t$dst",
+ [(set GR8:$dst, (not GR8:$src1))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "not{w}\t$dst",
+ [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "not{l}\t$dst",
+ [(set GR32:$dst, (not GR32:$src1))]>;
+def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
+ [(set GR64:$dst, (not GR64:$src1))]>;
+}
+} // Constraints = "$src1 = $dst"
+
+def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
+ "not{b}\t$dst",
+ [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
+def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
+ "not{w}\t$dst",
+ [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
+ "not{l}\t$dst",
+ [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
+def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
+ [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+} // CodeSize
+
+// TODO: inc/dec is slow for P4, but fast for Pentium-M.
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "inc{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+ OpSize, Requires<[In32BitMode]>;
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+ Requires<[In32BitMode]>;
+def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 1
+
+
+// In 64-bit mode, single byte INC and DEC cannot be encoded.
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
+// Can transform into LEA.
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+ Requires<[In64BitMode]>;
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+ Requires<[In64BitMode]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 2
+
+} // Constraints = "$src1 = $dst"
+
+let CodeSize = 2 in {
+ def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>;
+ def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+ def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In32BitMode]>;
+ def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
+// how to unfold them.
+// FIXME: What is this for??
+def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+} // CodeSize = 2
+
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "dec{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+ OpSize, Requires<[In32BitMode]>;
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+ Requires<[In32BitMode]>;
+def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+} // CodeSize = 2
+} // Constraints = "$src1 = $dst"
+
+
+let CodeSize = 2 in {
+ def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>;
+ def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+ def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In32BitMode]>;
+ def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>;
+} // CodeSize = 2
+} // Defs = [EFLAGS]
+
+
+/// X86TypeInfo - This is a bunch of information that describes relevant X86
+/// information about value types. For example, it can tell you what the
+/// register class and preferred load to use.
+class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
+ PatFrag loadnode, X86MemOperand memoperand, ImmType immkind,
+ Operand immoperand, SDPatternOperator immoperator,
+ Operand imm8operand, SDPatternOperator imm8operator,
+ bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> {
+ /// VT - This is the value type itself.
+ ValueType VT = vt;
+
+ /// InstrSuffix - This is the suffix used on instructions with this type. For
+ /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
+ string InstrSuffix = instrsuffix;
+
+ /// RegClass - This is the register class associated with this type. For
+ /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
+ RegisterClass RegClass = regclass;
+
+ /// LoadNode - This is the load node associated with this type. For
+ /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
+ PatFrag LoadNode = loadnode;
+
+ /// MemOperand - This is the memory operand associated with this type. For
+ /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
+ X86MemOperand MemOperand = memoperand;
+
+ /// ImmEncoding - This is the encoding of an immediate of this type. For
+ /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32
+ /// since the immediate fields of i64 instructions is a 32-bit sign extended
+ /// value.
+ ImmType ImmEncoding = immkind;
+
+ /// ImmOperand - This is the operand kind of an immediate of this type. For
+ /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 ->
+ /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
+ /// extended value.
+ Operand ImmOperand = immoperand;
+
+ /// ImmOperator - This is the operator that should be used to match an
+ /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
+ SDPatternOperator ImmOperator = immoperator;
+
+ /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
+ /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is
+ /// only used for instructions that have a sign-extended imm8 field form.
+ Operand Imm8Operand = imm8operand;
+
+ /// Imm8Operator - This is the operator that should be used to match an 8-bit
+ /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
+ SDPatternOperator Imm8Operator = imm8operator;
+
+ /// HasOddOpcode - This bit is true if the instruction should have an odd (as
+ /// opposed to even) opcode. Operations on i8 are usually even, operations on
+ /// other datatypes are odd.
+ bit HasOddOpcode = hasOddOpcode;
+
+ /// HasOpSizePrefix - This bit is set to true if the instruction should have
+ /// the 0x66 operand size prefix. This is set for i16 types.
+ bit HasOpSizePrefix = hasOpSizePrefix;
+
+ /// HasREX_WPrefix - This bit is set to true if the instruction should have
+ /// the 0x40 REX prefix. This is set for i64 types.
+ bit HasREX_WPrefix = hasREX_WPrefix;
+}
+
+def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
+
+
+def Xi8 : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem ,
+ Imm8 , i8imm , imm, i8imm , invalid_node,
+ 0, 0, 0>;
+def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
+ Imm16, i16imm, imm, i16i8imm, i16immSExt8,
+ 1, 1, 0>;
+def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
+ Imm32, i32imm, imm, i32i8imm, i32immSExt8,
+ 1, 0, 0>;
+def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
+ Imm32, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
+ 1, 0, 1>;
+
+/// ITy - This instruction base class takes the type info for the instruction.
+/// Using this, it:
+/// 1. Concatenates together the instruction mnemonic with the appropriate
+/// suffix letter, a tab, and the arguments.
+/// 2. Infers whether the instruction should have a 0x66 prefix byte.
+/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
+/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
+/// or 1 (for i16,i32,i64 operations).
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
+ string mnemonic, string args, list<dag> pattern>
+ : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
+ opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
+ f, outs, ins,
+ !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
+
+ // Infer instruction prefixes from type info.
+ let hasOpSizePrefix = typeinfo.HasOpSizePrefix;
+ let hasREX_WPrefix = typeinfo.HasREX_WPrefix;
+}
+
+// BinOpRR - Instructions like "add reg, reg, reg".
+class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, list<dag> pattern, Format f = MRMDestReg>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
+// just a regclass (no eflags) as a result.
+class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
+// just a EFLAGS as a result.
+class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f = MRMDestReg>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+ f>;
+
+// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result.
+class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result, and has EFLAGS as input.
+class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
+ EFLAGS))]>;
+
+// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
+class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+ : ITy<opcode, MRMSrcReg, typeinfo,
+ (outs typeinfo.RegClass:$dst),
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $dst|$dst, $src2}", []> {
+ // The disassembler should know about this, but not the asmparser.
+ let isCodeGenOnly = 1;
+}
+
+// BinOpRM - Instructions like "add reg, reg, [mem]".
+class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, list<dag> pattern>
+ : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRM_R - Instructions like "add reg, reg, [mem]".
+class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_F - Instructions like "cmp reg, [mem]".
+class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RF - Instructions like "add reg, reg, [mem]".
+class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
+class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
+ EFLAGS))]>;
+
+// BinOpRI - Instructions like "add reg, reg, imm".
+class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Format f, dag outlist, list<dag> pattern>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+ let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpRI_R - Instructions like "add reg, reg, imm".
+class BinOpRI_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_F - Instructions like "cmp reg, imm".
+class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RF - Instructions like "add reg, reg, imm".
+class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
+class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
+ EFLAGS))]>;
+
+// BinOpRI8 - Instructions like "add reg, reg, imm8".
+class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Format f, dag outlist, list<dag> pattern>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+ let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpRI8_R - Instructions like "add reg, reg, imm8".
+class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_F - Instructions like "cmp reg, imm8".
+class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RF - Instructions like "add reg, reg, imm8".
+class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8".
+class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
+ EFLAGS))]>;
+
+// BinOpMR - Instructions like "add [mem], reg".
+class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ list<dag> pattern>
+ : ITy<opcode, MRMDestMem, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern>;
+
+// BinOpMR_RMW - Instructions like "add [mem], reg".
+class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
+class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMR_F - Instructions like "cmp [mem], reg".
+class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(set EFLAGS, (opnode (load addr:$dst), typeinfo.RegClass:$src))]>;
+
+// BinOpMI - Instructions like "add [mem], imm".
+class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
+ Format f, list<dag> pattern, bits<8> opcode = 0x80>
+ : ITy<opcode, f, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+ let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpMI_RMW - Instructions like "add [mem], imm".
+class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(store (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
+class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(store (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI_F - Instructions like "cmp [mem], imm".
+class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src))],
+ opcode>;
+
+// BinOpMI8 - Instructions like "add [mem], imm8".
+class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
+ Format f, list<dag> pattern>
+ : ITy<0x82, f, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+ let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpMI8_RMW - Instructions like "add [mem], imm8".
+class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(store (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8".
+class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(store (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI8_F - Instructions like "cmp [mem], imm8".
+class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(set EFLAGS, (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src))]>;
+
+// BinOpAI - Instructions like "add %eax, %eax, imm".
+class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Register areg>
+ : ITy<opcode, RawFrm, typeinfo,
+ (outs), (ins typeinfo.ImmOperand:$src),
+ mnemonic, !strconcat("{$src, %", areg.AsmName, "|%",
+ areg.AsmName, ", $src}"), []> {
+ let ImmT = typeinfo.ImmEncoding;
+ let Uses = [areg];
+ let Defs = [areg];
+}
+
+/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (...".
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnodeflag, SDNode opnode,
+ bit CommutableRR, bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def #NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+ def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
+ def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
+ def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+ } // isCommutable
+
+ def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def #NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+ def #NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
+ def #NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
+ def #NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+ def #NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def #NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
+ }
+ } // Constraints = "$src1 = $dst"
+
+ def #NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def #NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ }
+}
+
+/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (node LHS, RHS, EFLAGS))" like ADC and
+/// SBB.
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnode, bit CommutableRR,
+ bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def #NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isCommutable
+
+ def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def #NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def #NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
+ def #NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
+ def #NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def #NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ }
+ } // Constraints = "$src1 = $dst"
+
+ def #NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def #NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ }
+}
+
+/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
+/// defined with "(set EFLAGS, (...". It would be really nice to find a way
+/// to factor this with the other ArithBinOp_*.
+///
+multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnode,
+ bit CommutableRR, bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def #NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isCommutable
+
+ def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
+ def #NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
+ def #NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def #NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ }
+
+ def #NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def #NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ }
+}
+
+
+defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
+ X86and_flag, and, 1, 0>;
+defm OR : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
+ X86or_flag, or, 1, 0>;
+defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
+ X86xor_flag, xor, 1, 0>;
+defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
+ X86add_flag, add, 1, 1>;
+defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
+ X86sub_flag, sub, 0, 0>;
+
+// Arithmetic.
+let Uses = [EFLAGS] in {
+ defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag,
+ 1, 0>;
+ defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag,
+ 0, 0>;
+}
+
+defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
+
+
+//===----------------------------------------------------------------------===//
+// Semantically, test instructions are similar like AND, except they don't
+// generate a result. From an encoding perspective, they are very different:
+// they don't have all the usual imm8 and REV forms, and are encoded into a
+// different space.
+def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86cmp (and_su node:$lhs, node:$rhs), 0)>;
+
+let Defs = [EFLAGS] in {
+ let isCommutable = 1 in {
+ def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
+ def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
+ def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>;
+ def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>;
+ } // isCommutable
+
+ def TEST8rm : BinOpRM_F<0x84, "test", Xi8 , X86testpat>;
+ def TEST16rm : BinOpRM_F<0x84, "test", Xi16, X86testpat>;
+ def TEST32rm : BinOpRM_F<0x84, "test", Xi32, X86testpat>;
+ def TEST64rm : BinOpRM_F<0x84, "test", Xi64, X86testpat>;
+
+ def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
+ def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
+ def TEST32ri : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>;
+ def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;
+
+ def TEST8mi : BinOpMI_F<"test", Xi8 , X86testpat, MRM0m, 0xF6>;
+ def TEST16mi : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>;
+ def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
+ def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
+
+ def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL>;
+ def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX>;
+ def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX>;
+ def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX>;
+}
+
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 2a6a71d..1ea8071 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -56,6 +56,31 @@ struct X86AddressMode {
: BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
Base.Reg = 0;
}
+
+
+ void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
+ assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
+
+ if (BaseType == X86AddressMode::RegBase)
+ MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
+ false, false, false, 0, false));
+ else {
+ assert(BaseType == X86AddressMode::FrameIndexBase);
+ MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
+ }
+
+ MO.push_back(MachineOperand::CreateImm(Scale));
+ MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
+ false, false, false, 0, false));
+
+ if (GV)
+ MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
+ else
+ MO.push_back(MachineOperand::CreateImm(Disp));
+
+ MO.push_back(MachineOperand::CreateReg(0, false, false,
+ false, false, false, 0, false));
+ }
};
/// addDirectMem - This function is used to add a direct memory reference to the
@@ -101,10 +126,11 @@ addFullAddress(const MachineInstrBuilder &MIB,
if (AM.BaseType == X86AddressMode::RegBase)
MIB.addReg(AM.Base.Reg);
- else if (AM.BaseType == X86AddressMode::FrameIndexBase)
+ else {
+ assert(AM.BaseType == X86AddressMode::FrameIndexBase);
MIB.addFrameIndex(AM.Base.FrameIndex);
- else
- assert (0);
+ }
+
MIB.addImm(AM.Scale).addReg(AM.IndexReg);
if (AM.GV)
MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
@@ -131,9 +157,8 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
if (TID.mayStore())
Flags |= MachineMemOperand::MOStore;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- Flags, Offset,
- MFI.getObjectSize(FI),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
+ Flags, MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
return addOffset(MIB.addFrameIndex(FI), Offset)
.addMemOperand(MMO);
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
new file mode 100644
index 0000000..3a43b22
--- /dev/null
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -0,0 +1,104 @@
+//===- X86InstrCMovSetCC.td - Conditional Move and SetCC ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 conditional move and set on condition
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+
+// SetCC instructions.
+multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+ isCommutable = 1 in {
+ def #NAME#16rr
+ : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR16:$dst,
+ (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize;
+ def #NAME#32rr
+ : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR32:$dst,
+ (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB;
+ def #NAME#64rr
+ :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR64:$dst,
+ (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
+ }
+
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
+ def #NAME#16rm
+ : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ CondNode, EFLAGS))]>, TB, OpSize;
+ def #NAME#32rm
+ : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ CondNode, EFLAGS))]>, TB;
+ def #NAME#64rm
+ :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ CondNode, EFLAGS))]>, TB;
+ } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // end multiclass
+
+
+// Conditional Moves.
+defm CMOVO : CMOV<0x40, "cmovo" , X86_COND_O>;
+defm CMOVNO : CMOV<0x41, "cmovno", X86_COND_NO>;
+defm CMOVB : CMOV<0x42, "cmovb" , X86_COND_B>;
+defm CMOVAE : CMOV<0x43, "cmovae", X86_COND_AE>;
+defm CMOVE : CMOV<0x44, "cmove" , X86_COND_E>;
+defm CMOVNE : CMOV<0x45, "cmovne", X86_COND_NE>;
+defm CMOVBE : CMOV<0x46, "cmovbe", X86_COND_BE>;
+defm CMOVA : CMOV<0x47, "cmova" , X86_COND_A>;
+defm CMOVS : CMOV<0x48, "cmovs" , X86_COND_S>;
+defm CMOVNS : CMOV<0x49, "cmovns", X86_COND_NS>;
+defm CMOVP : CMOV<0x4A, "cmovp" , X86_COND_P>;
+defm CMOVNP : CMOV<0x4B, "cmovnp", X86_COND_NP>;
+defm CMOVL : CMOV<0x4C, "cmovl" , X86_COND_L>;
+defm CMOVGE : CMOV<0x4D, "cmovge", X86_COND_GE>;
+defm CMOVLE : CMOV<0x4E, "cmovle", X86_COND_LE>;
+defm CMOVG : CMOV<0x4F, "cmovg" , X86_COND_G>;
+
+
+// SetCC instructions.
+multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
+ let Uses = [EFLAGS] in {
+ def r : I<opc, MRM0r, (outs GR8:$dst), (ins),
+ !strconcat(Mnemonic, "\t$dst"),
+ [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB;
+ def m : I<opc, MRM0m, (outs), (ins i8mem:$dst),
+ !strconcat(Mnemonic, "\t$dst"),
+ [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB;
+ } // Uses = [EFLAGS]
+}
+
+defm SETO : SETCC<0x90, "seto", X86_COND_O>; // is overflow bit set
+defm SETNO : SETCC<0x91, "setno", X86_COND_NO>; // is overflow bit not set
+defm SETB : SETCC<0x92, "setb", X86_COND_B>; // unsigned less than
+defm SETAE : SETCC<0x93, "setae", X86_COND_AE>; // unsigned greater or equal
+defm SETE : SETCC<0x94, "sete", X86_COND_E>; // equal to
+defm SETNE : SETCC<0x95, "setne", X86_COND_NE>; // not equal to
+defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>; // unsigned less than or equal
+defm SETA : SETCC<0x97, "seta", X86_COND_A>; // unsigned greater than
+defm SETS : SETCC<0x98, "sets", X86_COND_S>; // is signed bit set
+defm SETNS : SETCC<0x99, "setns", X86_COND_NS>; // is not signed
+defm SETP : SETCC<0x9A, "setp", X86_COND_P>; // is parity bit set
+defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>; // is parity bit not set
+defm SETL : SETCC<0x9C, "setl", X86_COND_L>; // signed less than
+defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal
+defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal
+defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than
+
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
new file mode 100644
index 0000000..4c915d9
--- /dev/null
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -0,0 +1,1626 @@
+//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various pseudo instructions used by the compiler,
+// as well as Pat patterns used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pattern Matching Support
+
+def GetLo32XForm : SDNodeXForm<imm, [{
+ // Transformation function: get the low 32 bits.
+ return getI32Imm((unsigned)N->getZExtValue());
+}]>;
+
+def GetLo8XForm : SDNodeXForm<imm, [{
+ // Transformation function: get the low 8 bits.
+ return getI8Imm((uint8_t)N->getZExtValue());
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Random Pseudo Instructions.
+
+// PIC base construction. This expands to code that looks like this:
+// call $next_inst
+// popl %destreg"
+let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
+ def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
+ "", []>;
+
+
+// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [ESP, EFLAGS], Uses = [ESP] in {
+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In32BitMode]>;
+def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In32BitMode]>;
+}
+
+// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [RSP, EFLAGS], Uses = [RSP] in {
+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In64BitMode]>;
+def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In64BitMode]>;
+}
+
+
+
+// x86-64 va_start lowering magic.
+let usesCustomInserter = 1 in {
+def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
+ (outs),
+ (ins GR8:$al,
+ i64imm:$regsavefi, i64imm:$offset,
+ variable_ops),
+ "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
+ [(X86vastart_save_xmm_regs GR8:$al,
+ imm:$regsavefi,
+ imm:$offset)]>;
+
+// The VAARG_64 pseudo-instruction takes the address of the va_list,
+// and places the address of the next argument into a register.
+let Defs = [EFLAGS] in
+def VAARG_64 : I<0, Pseudo,
+ (outs GR64:$dst),
+ (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
+ "#VAARG_64 $dst, $ap, $size, $mode, $align",
+ [(set GR64:$dst,
+ (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
+ (implicit EFLAGS)]>;
+
+// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
+// targets. These calls are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+ def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
+ "# dynamic stack allocation",
+ [(X86WinAlloca)]>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR32:$addr)]>;
+
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR64:$addr)]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: Set encoding to pseudo.
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isCodeGenOnly = 1 in {
+def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
+ [(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+ "",
+ [(set GR16:$dst, 0)]>, OpSize;
+
+// FIXME: Set encoding to pseudo.
+def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, 0)]>;
+}
+
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
+// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
+// when we have a better way to specify isel priority.
+let Defs = [EFLAGS], isCodeGenOnly=1,
+ AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
+ [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isCodeGenOnly = 1 in
+def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
+ "", [(set GR64:$dst, i64immZExt32:$src)]>;
+
+// Use sbb to materialize carry bit.
+let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
+// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
+// However, Pat<> can't replicate the destination reg into the inputs of the
+// result.
+// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
+// X86CodeEmitter.
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
+ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
+ [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
+ OpSize;
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
+ [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+} // isCodeGenOnly
+
+
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C64r)>;
+
+def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C64r)>;
+
+// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
+// will be eliminated and that the sbb can be extended up to a wider type. When
+// this happens, it is great. However, if we are left with an 8-bit sbb and an
+// and, we might as well just match it as a setb.
+def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
+ (SETBr)>;
+
+//===----------------------------------------------------------------------===//
+// String Pseudo Instructions
+//
+let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
+def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)]>, REP;
+def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)]>, REP, OpSize;
+def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)]>, REP;
+}
+
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
+def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+ [(X86rep_movs i64)]>, REP;
+
+
+// FIXME: Should use "(X86rep_stos AL)" as the pattern.
+let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)]>, REP;
+let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)]>, REP, OpSize;
+let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)]>, REP;
+
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
+def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+ [(X86rep_stos i64)]>, REP;
+
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//
+
+// ELF TLS Support
+// All calls clobber the non-callee saved registers. ESP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLS_addr32",
+ [(X86tlsaddr tls32addr:$sym)]>,
+ Requires<[In32BitMode]>;
+
+// All calls clobber the non-callee saved registers. RSP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in
+def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLS_addr64",
+ [(X86tlsaddr tls64addr:$sym)]>,
+ Requires<[In64BitMode]>;
+
+// Darwin TLS Support
+// For i386, the address of the thunk is passed on the stack, on return the
+// address of the variable is in %eax. %ecx is trashed during the function
+// call. All other registers are preserved.
+let Defs = [EAX, ECX, EFLAGS],
+ Uses = [ESP],
+ usesCustomInserter = 1 in
+def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLSCall_32",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In32BitMode]>;
+
+// For x86_64, the address of the thunk is passed in %rdi, on return
+// the address of the variable is in %rax. All other registers are preserved.
+let Defs = [RAX, EFLAGS],
+ Uses = [RSP, RDI],
+ usesCustomInserter = 1 in
+def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLSCall_64",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions
+
+let Constraints = "$src1 = $dst" in {
+
+// Conditional moves
+let Uses = [EFLAGS] in {
+
+// X86 doesn't have 8-bit conditional moves. Use a customInserter to
+// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
+// however that requires promoting the operands, and can induce additional
+// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
+// clobber EFLAGS, because if one of the operands is zero, the expansion
+// could involve an xor.
+let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
+def CMOV_GR8 : I<0, Pseudo,
+ (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
+ "#CMOV_GR8 PSEUDO!",
+ [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
+ imm:$cond, EFLAGS))]>;
+
+let Predicates = [NoCMov] in {
+def CMOV_GR32 : I<0, Pseudo,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
+ "#CMOV_GR32* PSEUDO!",
+ [(set GR32:$dst,
+ (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_GR16 : I<0, Pseudo,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
+ "#CMOV_GR16* PSEUDO!",
+ [(set GR16:$dst,
+ (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_RFP32 : I<0, Pseudo,
+ (outs RFP32:$dst),
+ (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+ "#CMOV_RFP32 PSEUDO!",
+ [(set RFP32:$dst,
+ (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+ EFLAGS))]>;
+def CMOV_RFP64 : I<0, Pseudo,
+ (outs RFP64:$dst),
+ (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+ "#CMOV_RFP64 PSEUDO!",
+ [(set RFP64:$dst,
+ (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+ EFLAGS))]>;
+def CMOV_RFP80 : I<0, Pseudo,
+ (outs RFP80:$dst),
+ (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+ "#CMOV_RFP80 PSEUDO!",
+ [(set RFP80:$dst,
+ (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+ EFLAGS))]>;
+} // Predicates = [NoCMov]
+} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
+} // Uses = [EFLAGS]
+
+} // Constraints = "$src1 = $dst" in
+
+
+//===----------------------------------------------------------------------===//
+// Atomic Instruction Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// Atomic exchange, and, or, xor
+let Constraints = "$val = $dst", Defs = [EFLAGS],
+ usesCustomInserter = 1 in {
+
+def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
+def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
+def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMXOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
+def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMNAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
+
+def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
+def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
+def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMXOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
+def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMNAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
+def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+ "#ATOMMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
+def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
+
+
+def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
+def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
+def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMXOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
+def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMNAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
+def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+ "#ATOMMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
+def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
+
+
+
+def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMAND64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
+def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMOR64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
+def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMXOR64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
+def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMNAND64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
+def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+ "#ATOMMIN64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
+def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMMAX64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMUMIN64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMUMAX64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
+}
+
+let Constraints = "$val1 = $dst1, $val2 = $dst2",
+ Defs = [EFLAGS, EAX, EBX, ECX, EDX],
+ Uses = [EAX, EBX, ECX, EDX],
+ mayLoad = 1, mayStore = 1,
+ usesCustomInserter = 1 in {
+def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMAND6432 PSEUDO!", []>;
+def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMOR6432 PSEUDO!", []>;
+def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMXOR6432 PSEUDO!", []>;
+def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMNAND6432 PSEUDO!", []>;
+def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMADD6432 PSEUDO!", []>;
+def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSUB6432 PSEUDO!", []>;
+def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSWAP6432 PSEUDO!", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// FIXME: Use normal instructions and add lock prefix dynamically.
+
+// Memory barriers
+
+// TODO: Get this to fold the constant into the instruction.
+let isCodeGenOnly = 1 in
+def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
+ "lock\n\t"
+ "or{l}\t{$zero, $dst|$dst, $zero}",
+ []>, Requires<[In32BitMode]>, LOCK;
+
+let hasSideEffects = 1 in
+def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
+ "#MEMBARRIER",
+ [(X86MemBarrier)]>, Requires<[HasSSE2]>;
+
+// TODO: Get this to fold the constant into the instruction.
+let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in
+def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
+ "lock\n\t"
+ "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
+ [(X86MemBarrierNoSSE GR64:$zero)]>,
+ Requires<[In64BitMode]>, LOCK;
+
+
+// Optimized codegen when the non-memory output is not used.
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ "lock\n\t"
+ "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "lock\n\t"
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "lock\n\t"
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "lock\n\t"
+ "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
+ "lock\n\t"
+ "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "lock\n\t"
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "lock\n\t"
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
+ (ins i64mem:$dst, i64i32imm :$src2),
+ "lock\n\t"
+ "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "lock\n\t"
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "lock\n\t"
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
+ (ins i64mem:$dst, i64i8imm :$src2),
+ "lock\n\t"
+ "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
+ "lock\n\t"
+ "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "lock\n\t"
+ "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "lock\n\t"
+ "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "lock\n\t"
+ "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+
+def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
+ "lock\n\t"
+ "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "lock\n\t"
+ "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "lock\n\t"
+ "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
+ (ins i64mem:$dst, i64i32imm:$src2),
+ "lock\n\t"
+ "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+
+def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "lock\n\t"
+ "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "lock\n\t"
+ "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
+ (ins i64mem:$dst, i64i8imm :$src2),
+ "lock\n\t"
+ "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+ "lock\n\t"
+ "inc{b}\t$dst", []>, LOCK;
+def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+ "lock\n\t"
+ "inc{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+ "lock\n\t"
+ "inc{l}\t$dst", []>, LOCK;
+def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+ "lock\n\t"
+ "inc{q}\t$dst", []>, LOCK;
+
+def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+ "lock\n\t"
+ "dec{b}\t$dst", []>, LOCK;
+def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+ "lock\n\t"
+ "dec{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+ "lock\n\t"
+ "dec{l}\t$dst", []>, LOCK;
+def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+ "lock\n\t"
+ "dec{q}\t$dst", []>, LOCK;
+}
+
+// Atomic compare and swap.
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
+ isCodeGenOnly = 1 in {
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
+ "lock\n\t"
+ "cmpxchg8b\t$ptr",
+ [(X86cas8 addr:$ptr)]>, TB, LOCK;
+}
+let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
+def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
+ "lock\n\t"
+ "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+}
+
+let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
+def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
+ "lock\n\t"
+ "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+}
+
+let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
+def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
+ "lock\n\t"
+ "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+}
+
+let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
+def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
+ "lock\n\t"
+ "cmpxchgq\t$swap,$ptr",
+ [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+}
+
+// Atomic exchange and add
+let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
+def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
+ "lock\n\t"
+ "xadd{b}\t{$val, $ptr|$ptr, $val}",
+ [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+ TB, LOCK;
+def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
+ "lock\n\t"
+ "xadd{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+ TB, OpSize, LOCK;
+def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
+ "lock\n\t"
+ "xadd{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+ TB, LOCK;
+def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
+ "lock\n\t"
+ "xadd\t$val, $ptr",
+ [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+ TB, LOCK;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions.
+//===----------------------------------------------------------------------===//
+
+
+// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let Uses = [EFLAGS], usesCustomInserter = 1 in {
+ def CMOV_FR32 : I<0, Pseudo,
+ (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
+ "#CMOV_FR32 PSEUDO!",
+ [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_FR64 : I<0, Pseudo,
+ (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
+ "#CMOV_FR64 PSEUDO!",
+ [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_V4F32 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V4F32 PSEUDO!",
+ [(set VR128:$dst,
+ (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2F64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2F64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2I64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2I64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DAG Pattern Matching Rules
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
+def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
+def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
+
+def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
+ (ADD32ri GR32:$src1, tconstpool:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
+ (ADD32ri GR32:$src1, tjumptable:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
+ (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
+ (ADD32ri GR32:$src1, texternalsym:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
+ (ADD32ri GR32:$src1, tblockaddress:$src2)>;
+
+def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV32mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tblockaddress:$src)>;
+
+
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
+// code model mode, should use 'movabs'. FIXME: This is really a hack, the
+// 'movabs' predicate should handle this sort of thing.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
+
+// In static codegen with small code model, we can get the address of a label
+// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri64i32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
+
+// In kernel code model, we can get the address of a label
+// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
+
+// If we have small model and -static mode, it is safe to store global addresses
+// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
+// for MOV64mi32 should handle this sort of thing.
+def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tconstpool:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tjumptable:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, texternalsym:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tblockaddress:$src)>,
+ Requires<[NearData, IsStatic]>;
+
+
+
+// Calls
+
+// tls has some funny stuff here...
+// This corresponds to movabs $foo@tpoff, %rax
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+ (MOV64ri tglobaltlsaddr :$dst)>;
+// This corresponds to add $foo@tpoff, %rax
+def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
+ (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
+// This corresponds to mov foo@tpoff(%rbx), %eax
+def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
+ (MOV64rm tglobaltlsaddr :$dst)>;
+
+
+// Direct PC relative function call for small code model. 32-bit displacement
+// sign extended to 64-bit.
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
+
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
+
+// tailcall stuff
+def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
+ (TCRETURNri GR32_TC:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+// FIXME: This is disabled for 32-bit PIC mode because the global base
+// register which is part of the address mode may be assigned a
+// callee-saved register.
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+ (TCRETURNmi addr:$dst, imm:$off)>,
+ Requires<[In32BitMode, IsNotPIC]>;
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+ (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+ (TCRETURNmi64 addr:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
+ (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+// Normal calls, with various flavors of addresses.
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+ (CALLpcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+ (CALLpcrel32 texternalsym:$dst)>;
+def : Pat<(X86call (i32 imm:$dst)),
+ (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(X86cmp GR8:$src1, 0),
+ (TEST8rr GR8:$src1, GR8:$src1)>;
+def : Pat<(X86cmp GR16:$src1, 0),
+ (TEST16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86cmp GR32:$src1, 0),
+ (TEST32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(X86cmp GR64:$src1, 0),
+ (TEST64rr GR64:$src1, GR64:$src1)>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
+ Instruction Inst64> {
+ def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
+ (Inst16 GR16:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
+ (Inst32 GR32:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
+ (Inst64 GR64:$src2, addr:$src1)>;
+}
+
+defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
+defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
+defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
+defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
+defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
+defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
+defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
+defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
+defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
+defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
+defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
+defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
+defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
+defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
+defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
+defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
+
+// zextload bool -> zextload byte
+def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+
+// extload bool -> extload byte
+// When extloading from 16-bit and smaller memory locations into 64-bit
+// registers, use zero-extending loads so that the entire 64-bit register is
+// defined, avoiding partial-register updates.
+
+def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
+
+def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
+// For other extloads, use subregs, since the high contents of the register are
+// defined after an extload.
+def : Pat<(extloadi64i32 addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
+ sub_32bit)>;
+
+// anyext. Define these to do an explicit zero-extend to
+// avoid partial-register updates.
+def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
+
+// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
+def : Pat<(i32 (anyext GR16:$src)),
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
+
+def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
+def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
+def : Pat<(i64 (anyext GR32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. And x86's cmov doesn't do anything if the
+// condition is false. But any other 32-bit operation will zero-extend
+// up to 64 bits.
+def def32 : PatLeaf<(i32 GR32:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg &&
+ N->getOpcode() != X86ISD::CMOV;
+}]>;
+
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+//===----------------------------------------------------------------------===//
+// Pattern match OR as ADD
+//===----------------------------------------------------------------------===//
+
+// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
+// 3-addressified into an LEA instruction to avoid copies. However, we also
+// want to finally emit these instructions as an or at the end of the code
+// generator to make the generated code easier to read. To do this, we select
+// into "disjoint bits" pseudo ops.
+
+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero0, KnownOne0;
+ CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+ APInt KnownZero1, KnownOne1;
+ CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+ return (~KnownZero0 & ~KnownZero1) == 0;
+}]>;
+
+
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+let AddedComplexity = 5 in { // Try this before the selecting to OR
+
+let isConvertibleToThreeAddress = 1,
+ Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
+let isCommutable = 1 in {
+def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "", // orw/addw REG, REG
+ [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
+def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "", // orl/addl REG, REG
+ [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
+def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "", // orq/addq REG, REG
+ [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
+} // isCommutable
+
+// NOTE: These are order specific, we want the ri8 forms to be listed
+// first so that they are slightly preferred to the ri forms.
+
+def ADD16ri8_DB : I<0, Pseudo,
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "", // orw/addw REG, imm8
+ [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
+def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "", // orw/addw REG, imm
+ [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
+
+def ADD32ri8_DB : I<0, Pseudo,
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "", // orl/addl REG, imm8
+ [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
+def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "", // orl/addl REG, imm
+ [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
+
+
+def ADD64ri8_DB : I<0, Pseudo,
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "", // orq/addq REG, imm8
+ [(set GR64:$dst, (or_is_add GR64:$src1,
+ i64immSExt8:$src2))]>;
+def ADD64ri32_DB : I<0, Pseudo,
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "", // orq/addq REG, imm
+ [(set GR64:$dst, (or_is_add GR64:$src1,
+ i64immSExt32:$src2))]>;
+}
+} // AddedComplexity
+
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR16:$src1, 128),
+ (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
+ (SUB16mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR32:$src1, 128),
+ (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
+ (SUB32mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR64:$src1, 128),
+ (SUB64ri8 GR64:$src1, -128)>;
+def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
+ (SUB64mi8 addr:$dst, -128)>;
+
+// The same trick applies for 32-bit immediate fields in 64-bit
+// instructions.
+def : Pat<(add GR64:$src1, 0x0000000080000000),
+ (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
+ (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+
+// To avoid needing to materialize an immediate in a register, use a 32-bit and
+// with implicit zero-extension instead of a 64-bit and if the immediate has at
+// least 32 bits of leading zeros. If in addition the last 32 bits can be
+// represented with a sign extension of a 8 bit constant, use that.
+
+def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri8
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo8XForm imm:$imm))),
+ sub_32bit)>;
+
+def : Pat<(and GR64:$src, i64immZExt32:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo32XForm imm:$imm))),
+ sub_32bit)>;
+
+
+// r & (2^16-1) ==> movz
+def : Pat<(and GR32:$src1, 0xffff),
+ (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
+ GR32_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
+ GR16_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+
+// r & (2^32-1) ==> movz
+def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
+ (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+// r & (2^16-1) ==> movz
+def : Pat<(and GR64:$src, 0xffff),
+ (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR64:$src, 0xff),
+ (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
+ Requires<[In64BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
+ Requires<[In64BitMode]>;
+
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR32:$src, i16),
+ (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+ (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+ GR16_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(sext_inreg GR64:$src, i32),
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+def : Pat<(sext_inreg GR64:$src, i16),
+ (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR64:$src, i8),
+ (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
+ Requires<[In64BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+ (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
+ Requires<[In64BitMode]>;
+
+
+// trunc patterns
+def : Pat<(i16 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
+def : Pat<(i16 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
+ Requires<[In64BitMode]>;
+
+// h-register tricks
+def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32rr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_16bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+ GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+ GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+
+// h-register tricks.
+// For now, be conservative on x86-64 and use an h-register extract only if the
+// value is immediately zero-extended or stored, which are somewhat common
+// cases. This uses a bunch of code to prevent a register requiring a REX prefix
+// from being allocated in the same instruction as the h register, as there's
+// currently no way to describe this requirement to the register allocator.
+
+// h-register extract and zero-extend.
+def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+ (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_16bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+
+// h-register extract and store.
+def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+ sub_8bit_hi))>;
+def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+
+
+// (shl x, 1) ==> (add x, x)
+def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
+def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+
+// (shl x (and y, 31)) ==> (shl x, y)
+def : Pat<(shl GR8:$src1, (and CL, 31)),
+ (SHL8rCL GR8:$src1)>;
+def : Pat<(shl GR16:$src1, (and CL, 31)),
+ (SHL16rCL GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (and CL, 31)),
+ (SHL32rCL GR32:$src1)>;
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHL8mCL addr:$dst)>;
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHL16mCL addr:$dst)>;
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHL32mCL addr:$dst)>;
+
+def : Pat<(srl GR8:$src1, (and CL, 31)),
+ (SHR8rCL GR8:$src1)>;
+def : Pat<(srl GR16:$src1, (and CL, 31)),
+ (SHR16rCL GR16:$src1)>;
+def : Pat<(srl GR32:$src1, (and CL, 31)),
+ (SHR32rCL GR32:$src1)>;
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHR8mCL addr:$dst)>;
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHR16mCL addr:$dst)>;
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHR32mCL addr:$dst)>;
+
+def : Pat<(sra GR8:$src1, (and CL, 31)),
+ (SAR8rCL GR8:$src1)>;
+def : Pat<(sra GR16:$src1, (and CL, 31)),
+ (SAR16rCL GR16:$src1)>;
+def : Pat<(sra GR32:$src1, (and CL, 31)),
+ (SAR32rCL GR32:$src1)>;
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+ (SAR8mCL addr:$dst)>;
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+ (SAR16mCL addr:$dst)>;
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+ (SAR32mCL addr:$dst)>;
+
+// (shl x (and y, 63)) ==> (shl x, y)
+def : Pat<(shl GR64:$src1, (and CL, 63)),
+ (SHL64rCL GR64:$src1)>;
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SHL64mCL addr:$dst)>;
+
+def : Pat<(srl GR64:$src1, (and CL, 63)),
+ (SHR64rCL GR64:$src1)>;
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SHR64mCL addr:$dst)>;
+
+def : Pat<(sra GR64:$src1, (and CL, 63)),
+ (SAR64rCL GR64:$src1)>;
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SAR64mCL addr:$dst)>;
+
+
+// (anyext (setcc_carry)) -> (setcc_carry)
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+
+
+
+
+//===----------------------------------------------------------------------===//
+// EFLAGS-defining Patterns
+//===----------------------------------------------------------------------===//
+
+// add reg, reg
+def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
+
+// add reg, mem
+def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
+ (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
+ (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
+ (ADD32rm GR32:$src1, addr:$src2)>;
+
+// add reg, imm
+def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
+def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(add GR16:$src1, i16immSExt8:$src2),
+ (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(add GR32:$src1, i32immSExt8:$src2),
+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// sub reg, reg
+def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
+
+// sub reg, mem
+def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
+ (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
+ (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
+ (SUB32rm GR32:$src1, addr:$src2)>;
+
+// sub reg, imm
+def : Pat<(sub GR8:$src1, imm:$src2),
+ (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, imm:$src2),
+ (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(sub GR32:$src1, imm:$src2),
+ (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
+ (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
+ (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// mul reg, reg
+def : Pat<(mul GR16:$src1, GR16:$src2),
+ (IMUL16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(mul GR32:$src1, GR32:$src2),
+ (IMUL32rr GR32:$src1, GR32:$src2)>;
+
+// mul reg, mem
+def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
+ (IMUL16rm GR16:$src1, addr:$src2)>;
+def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
+ (IMUL32rm GR32:$src1, addr:$src2)>;
+
+// mul reg, imm
+def : Pat<(mul GR16:$src1, imm:$src2),
+ (IMUL16rri GR16:$src1, imm:$src2)>;
+def : Pat<(mul GR32:$src1, imm:$src2),
+ (IMUL32rri GR32:$src1, imm:$src2)>;
+def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
+ (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
+ (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// reg = mul mem, imm
+def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
+ (IMUL16rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
+ (IMUL32rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
+ (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
+ (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
+
+// Optimize multiply by 2 with EFLAGS result.
+let AddedComplexity = 2 in {
+def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
+}
+
+// Patterns for nodes that do not produce flags, for instructions that do.
+
+// addition
+def : Pat<(add GR64:$src1, GR64:$src2),
+ (ADD64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt8:$src2),
+ (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt32:$src2),
+ (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
+ (ADD64rm GR64:$src1, addr:$src2)>;
+
+// subtraction
+def : Pat<(sub GR64:$src1, GR64:$src2),
+ (SUB64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
+ (SUB64rm GR64:$src1, addr:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
+ (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
+ (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Multiply
+def : Pat<(mul GR64:$src1, GR64:$src2),
+ (IMUL64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
+ (IMUL64rm GR64:$src1, addr:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
+ (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
+ (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
+ (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
+ (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
+
+// Increment reg.
+def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>;
+def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
+
+// Decrement reg.
+def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>;
+def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+
+// or reg/reg.
+def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
+
+// or reg/mem
+def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
+ (OR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
+ (OR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
+ (OR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
+ (OR64rm GR64:$src1, addr:$src2)>;
+
+// or reg/imm
+def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, i16immSExt8:$src2),
+ (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(or GR32:$src1, i32immSExt8:$src2),
+ (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt8:$src2),
+ (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt32:$src2),
+ (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// xor reg/reg
+def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
+
+// xor reg/mem
+def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
+ (XOR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
+ (XOR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
+ (XOR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
+ (XOR64rm GR64:$src1, addr:$src2)>;
+
+// xor reg/imm
+def : Pat<(xor GR8:$src1, imm:$src2),
+ (XOR8ri GR8:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, imm:$src2),
+ (XOR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(xor GR32:$src1, imm:$src2),
+ (XOR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
+ (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
+ (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
+ (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
+ (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// and reg/reg
+def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
+
+// and reg/mem
+def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
+ (AND8rm GR8:$src1, addr:$src2)>;
+def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
+ (AND16rm GR16:$src1, addr:$src2)>;
+def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
+ (AND32rm GR32:$src1, addr:$src2)>;
+def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
+ (AND64rm GR64:$src1, addr:$src2)>;
+
+// and reg/imm
+def : Pat<(and GR8:$src1, imm:$src2),
+ (AND8ri GR8:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, imm:$src2),
+ (AND16ri GR16:$src1, imm:$src2)>;
+def : Pat<(and GR32:$src1, imm:$src2),
+ (AND32ri GR32:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, i16immSExt8:$src2),
+ (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(and GR32:$src1, i32immSExt8:$src2),
+ (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt8:$src2),
+ (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt32:$src2),
+ (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
new file mode 100644
index 0000000..77f4725
--- /dev/null
+++ b/lib/Target/X86/X86InstrControl.td
@@ -0,0 +1,294 @@
+//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 jump, return, call, and related instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+// Return instructions.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, FPForm = SpecialFP in {
+ def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ "ret",
+ [(X86retflag 0)]>;
+ def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ "ret\t$amt",
+ [(X86retflag timm:$amt)]>;
+ def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ "retw\t$amt",
+ []>, OpSize;
+ def LRETL : I <0xCB, RawFrm, (outs), (ins),
+ "lretl", []>;
+ def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
+ "lretq", []>;
+ def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "lret\t$amt", []>;
+ def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "lretw\t$amt", []>, OpSize;
+}
+
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+ def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+ "jmp\t$dst", [(br bb:$dst)]>;
+ def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+ "jmp\t$dst", []>;
+ def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+ "jmp{q}\t$dst", []>;
+}
+
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+ multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+ def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
+ def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
+ [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
+ }
+}
+
+defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
+defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// jcx/jecx/jrcx instructions.
+let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in {
+ // These are the 32-bit versions of this instruction for the asmparser. In
+ // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
+ // jecxz.
+ let Uses = [CX] in
+ def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>;
+ let Uses = [ECX] in
+ def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jecxz\t$dst", []>, Requires<[In32BitMode]>;
+
+ // J*CXZ instruction: 64-bit versions of this instruction for the asmparser.
+ // In 64-bit mode, the address size prefix is jecxz and the unprefixed version
+ // is jrcxz.
+ let Uses = [ECX] in
+ def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>;
+ let Uses = [RCX] in
+ def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jrcxz\t$dst", []>, Requires<[In64BitMode]>;
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
+ [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
+ def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
+ [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
+
+ def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
+ [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
+ def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
+ [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
+
+ def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
+ def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
+ "ljmp{q}\t{*}$dst", []>;
+
+ def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
+ "ljmp{w}\t{*}$dst", []>, OpSize;
+ def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
+ "ljmp{l}\t{*}$dst", []>;
+}
+
+
+// Loop instructions
+
+def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. ESP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in {
+ def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i32imm_pcrel:$dst,variable_ops),
+ "call{l}\t$dst", []>, Requires<[In32BitMode]>;
+ def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
+ "call{l}\t{*}$dst", [(X86call GR32:$dst)]>,
+ Requires<[In32BitMode]>;
+ def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
+ "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
+ Requires<[In32BitMode]>;
+
+ def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "lcall{l}\t{$seg, $off|$off, $seg}", []>;
+
+ def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
+ "lcall{w}\t{*}$dst", []>, OpSize;
+ def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
+ "lcall{l}\t{*}$dst", []>;
+
+ // callw for 16 bit code for the assembler.
+ let isAsmParserOnly = 1 in
+ def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+ (outs), (ins i16imm_pcrel:$dst, variable_ops),
+ "callw\t$dst", []>, OpSize;
+ }
+
+
+// Tail call stuff.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
+ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in {
+ def TCRETURNdi : PseudoI<(outs),
+ (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>;
+ def TCRETURNri : PseudoI<(outs),
+ (ins GR32_TC:$dst, i32imm:$offset, variable_ops), []>;
+ let mayLoad = 1 in
+ def TCRETURNmi : PseudoI<(outs),
+ (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+ // FIXME: The should be pseudo instructions that are lowered when going to
+ // mcinst.
+ def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
+ (ins i32imm_pcrel:$dst, variable_ops),
+ "jmp\t$dst # TAILCALL",
+ []>;
+ def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
+ "", []>; // FIXME: Remove encoding when JIT is dead.
+ let mayLoad = 1 in
+ def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
+ "jmp{l}\t{*}$dst # TAILCALL", []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. RSP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in {
+
+ // NOTE: this pattern doesn't match "X86call imm", because we do not know
+ // that the offset between an arbitrary immediate and the call will fit in
+ // the 32-bit pcrel field that we have.
+ def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", []>,
+ Requires<[In64BitMode, NotWin64]>;
+ def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
+ Requires<[In64BitMode, NotWin64]>;
+ def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+ "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
+ Requires<[In64BitMode, NotWin64]>;
+
+ def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
+ "lcall{q}\t{*}$dst", []>;
+ }
+
+ // FIXME: We need to teach codegen about single list of call-clobbered
+ // registers.
+let isCall = 1, isCodeGenOnly = 1 in
+ // All calls clobber the non-callee saved registers. RSP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+ Uses = [RSP] in {
+ def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", []>,
+ Requires<[IsWin64]>;
+ def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call{q}\t{*}$dst",
+ [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
+ def WINCALL64m : I<0xFF, MRM2m, (outs),
+ (ins i64mem:$dst,variable_ops),
+ "call{q}\t{*}$dst",
+ [(X86call (loadi64 addr:$dst))]>,
+ Requires<[IsWin64]>;
+ }
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
+ // AMD64 cc clobbers RSI, RDI, XMM6-XMM15.
+ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+ Uses = [RSP],
+ usesCustomInserter = 1 in {
+ def TCRETURNdi64 : PseudoI<(outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
+ []>;
+ def TCRETURNri64 : PseudoI<(outs),
+ (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>;
+ let mayLoad = 1 in
+ def TCRETURNmi64 : PseudoI<(outs),
+ (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+ def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
+ (ins i64i32imm_pcrel:$dst, variable_ops),
+ "jmp\t$dst # TAILCALL", []>;
+ def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
+ "jmp{q}\t{*}$dst # TAILCALL", []>;
+
+ let mayLoad = 1 in
+ def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
+ "jmp{q}\t{*}$dst # TAILCALL", []>;
+}
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
new file mode 100644
index 0000000..867c0f8
--- /dev/null
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -0,0 +1,172 @@
+//===- X86InstrExtension.td - Sign and Zero Extensions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the sign and zero extension operations.
+//
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+ let Defs = [AX], Uses = [AL] in
+ def CBW : I<0x98, RawFrm, (outs), (ins),
+ "{cbtw|cbw}", []>, OpSize; // AX = signext(AL)
+ let Defs = [EAX], Uses = [AX] in
+ def CWDE : I<0x98, RawFrm, (outs), (ins),
+ "{cwtl|cwde}", []>; // EAX = signext(AX)
+
+ let Defs = [AX,DX], Uses = [AX] in
+ def CWD : I<0x99, RawFrm, (outs), (ins),
+ "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
+ let Defs = [EAX,EDX], Uses = [EAX] in
+ def CDQ : I<0x99, RawFrm, (outs), (ins),
+ "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
+
+
+ let Defs = [RAX], Uses = [EAX] in
+ def CDQE : RI<0x98, RawFrm, (outs), (ins),
+ "{cltq|cdqe}", []>; // RAX = signext(EAX)
+
+ let Defs = [RAX,RDX], Uses = [RAX] in
+ def CQO : RI<0x99, RawFrm, (outs), (ins),
+ "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
+}
+
+
+// Sign/Zero extenders
+// Use movsbl intead of movsbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update. Actual movsbw included for the disassembler.
+def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// FIXME: Use a pat pattern or define a syntax here.
+let isCodeGenOnly=1 in {
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
+ "", [(set GR16:$dst, (sext GR8:$src))]>, TB;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
+ "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
+}
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR8:$src))]>, TB;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR16:$src))]>, TB;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+
+// Use movzbl intead of movzbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update. Actual movzbw included for the disassembler.
+def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+// FIXME: Use a pat pattern or define a syntax here.
+let isCodeGenOnly=1 in {
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
+ "", [(set GR16:$dst, (zext GR8:$src))]>, TB;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
+ "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
+}
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR8:$src))]>, TB;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR16:$src))]>, TB;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+
+// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
+ (outs GR32_NOREX:$dst), (ins GR8:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ []>, TB;
+let mayLoad = 1 in
+def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
+ (outs GR32_NOREX:$dst), (ins i8mem:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ []>, TB;
+
+// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
+// operand, which makes it a rare instruction with an 8-bit register
+// operand that can never access an h register. If support for h registers
+// were generalized, this would require a special register class.
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR8:$src))]>, TB;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR16:$src))]>, TB;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+
+// movzbq and movzwq encodings for the disassembler
+def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+
+// FIXME: These should be Pat patterns.
+let isCodeGenOnly = 1 in {
+
+// Use movzbl instead of movzbq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
+def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+// Use movzwl instead of movzwq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
+def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+
+// There's no movzlq instruction, but movl can be used for this purpose, using
+// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
+// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
+// zero-extension, however this isn't possible when the 32-bit value is
+// defined by a truncate or is copied from something where the high bits aren't
+// necessarily all zero. In such cases, we fall back to these explicit zext
+// instructions.
+def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
+ "", [(set GR64:$dst, (zext GR32:$src))]>;
+def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
+
+
+}
+
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 9c9bcc7..b506f5e 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -32,21 +32,24 @@ def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
- [SDNPHasChain, SDNPInFlag, SDNPMayStore]>;
+ [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+ SDNPMemOperand]>;
def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
- [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+ SDNPMemOperand]>;
def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
- [SDNPHasChain, SDNPMayStore]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
- [SDNPHasChain, SDNPMayStore]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
- [SDNPHasChain, SDNPMayStore]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
- [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>;
+ [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+ SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// FPStack pattern fragments
@@ -70,41 +73,23 @@ def fpimmneg1 : PatLeaf<(fpimm), [{
// Some 'special' instructions
let usesCustomInserter = 1 in { // Expanded after instruction selection.
- def FP32_TO_INT16_IN_MEM : I<0, Pseudo,
- (outs), (ins i16mem:$dst, RFP32:$src),
- "##FP32_TO_INT16_IN_MEM PSEUDO!",
+ def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
[(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
- def FP32_TO_INT32_IN_MEM : I<0, Pseudo,
- (outs), (ins i32mem:$dst, RFP32:$src),
- "##FP32_TO_INT32_IN_MEM PSEUDO!",
+ def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
[(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
- def FP32_TO_INT64_IN_MEM : I<0, Pseudo,
- (outs), (ins i64mem:$dst, RFP32:$src),
- "##FP32_TO_INT64_IN_MEM PSEUDO!",
+ def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
[(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
- def FP64_TO_INT16_IN_MEM : I<0, Pseudo,
- (outs), (ins i16mem:$dst, RFP64:$src),
- "##FP64_TO_INT16_IN_MEM PSEUDO!",
+ def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
[(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
- def FP64_TO_INT32_IN_MEM : I<0, Pseudo,
- (outs), (ins i32mem:$dst, RFP64:$src),
- "##FP64_TO_INT32_IN_MEM PSEUDO!",
+ def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
[(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
- def FP64_TO_INT64_IN_MEM : I<0, Pseudo,
- (outs), (ins i64mem:$dst, RFP64:$src),
- "##FP64_TO_INT64_IN_MEM PSEUDO!",
+ def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
[(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
- def FP80_TO_INT16_IN_MEM : I<0, Pseudo,
- (outs), (ins i16mem:$dst, RFP80:$src),
- "##FP80_TO_INT16_IN_MEM PSEUDO!",
+ def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
[(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
- def FP80_TO_INT32_IN_MEM : I<0, Pseudo,
- (outs), (ins i32mem:$dst, RFP80:$src),
- "##FP80_TO_INT32_IN_MEM PSEUDO!",
+ def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
[(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
- def FP80_TO_INT64_IN_MEM : I<0, Pseudo,
- (outs), (ins i64mem:$dst, RFP80:$src),
- "##FP80_TO_INT64_IN_MEM PSEUDO!",
+ def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
[(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
}
@@ -212,11 +197,11 @@ def _Fp80m64: FpI_<(outs RFP80:$dst),
[(set RFP80:$dst,
(OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
- !strconcat("f", !strconcat(asmstring, "{s}\t$src"))> {
+ !strconcat("f", asmstring, "{s}\t$src")> {
let mayLoad = 1;
}
def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
- !strconcat("f", !strconcat(asmstring, "{l}\t$src"))> {
+ !strconcat("f", asmstring, "{l}\t$src")> {
let mayLoad = 1;
}
// ST(0) = ST(0) + [memint]
@@ -245,11 +230,11 @@ def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
[(set RFP80:$dst, (OpNode RFP80:$src1,
(X86fild addr:$src2, i32)))]>;
def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
- !strconcat("fi", !strconcat(asmstring, "{s}\t$src"))> {
+ !strconcat("fi", asmstring, "{s}\t$src")> {
let mayLoad = 1;
}
def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
- !strconcat("fi", !strconcat(asmstring, "{l}\t$src"))> {
+ !strconcat("fi", asmstring, "{l}\t$src")> {
let mayLoad = 1;
}
}
@@ -580,16 +565,16 @@ def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop
def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i)
(outs), (ins RST:$reg),
- "fucomi\t{$reg, %st(0)|%ST(0), $reg}">, DB;
+ "fucomi\t$reg">, DB;
def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop
(outs), (ins RST:$reg),
- "fucomip\t{$reg, %st(0)|%ST(0), $reg}">, DF;
+ "fucompi\t$reg">, DF;
}
def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
- "fcomi\t{$reg, %st(0)|%ST(0), $reg}">, DB;
+ "fcomi\t$reg">, DB;
def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
- "fcomip\t{$reg, %st(0)|%ST(0), $reg}">, DF;
+ "fcompi\t$reg">, DF;
// Floating point flag ops.
let Defs = [AX] in
@@ -604,8 +589,8 @@ let mayLoad = 1 in
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
(outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
-// Register free
-
+// FPU control instructions
+def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
"ffree\t$reg">, DD;
@@ -613,7 +598,8 @@ def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB;
-// Operandless floating-point instructions for the disassembler
+// Operandless floating-point instructions for the disassembler.
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9;
def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9;
@@ -639,8 +625,12 @@ def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE;
def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
"fxsave\t$dst", []>, TB;
+def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+ "fxsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
"fxrstor\t$src", []>, TB;
+def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstorq\t$src", []>, TB, REX_W, Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 79187e9..344c14c 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -39,7 +39,8 @@ def MRM_E8 : Format<39>;
def MRM_F0 : Format<40>;
def MRM_F8 : Format<41>;
def MRM_F9 : Format<42>;
-def RawFrmImm16 : Format<43>;
+def RawFrmImm8 : Format<43>;
+def RawFrmImm16 : Format<44>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -108,6 +109,7 @@ class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
class VEX_L { bit hasVEX_L = 1; }
+class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
@@ -123,6 +125,9 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
dag InOperandList = ins;
string AsmString = AsmStr;
+ // If this is a pseudo instruction, mark it isCodeGenOnly.
+ let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
//
// Attributes specific to X86 instructions...
//
@@ -130,17 +135,18 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
bits<4> Prefix = 0; // Which prefix byte does this inst have?
- bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
+ bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
bits<2> SegOvrBits = 0; // Segment override prefix.
Domain ExeDomain = d;
- bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix?
+ bit hasVEXPrefix = 0; // Does this inst require a VEX prefix?
bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
- bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field?
- bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register
+ bit hasVEX_4VPrefix = 0; // Does this inst require the VEX.VVVV field?
+ bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register
// to be encoded in a immediate field?
- bit hasVEX_L = 0; // Does this inst uses large (256-bit) registers?
+ bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
+ bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
@@ -159,6 +165,12 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{34} = hasVEX_4VPrefix;
let TSFlags{35} = hasVEX_i8ImmReg;
let TSFlags{36} = hasVEX_L;
+ let TSFlags{37} = has3DNow0F0FOpcode;
+}
+
+class PseudoI<dag oops, dag iops, list<dag> pattern>
+ : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+ let Pattern = pattern;
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 01149b6..5016c0f 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -15,51 +15,8 @@
// MMX Pattern Fragments
//===----------------------------------------------------------------------===//
-def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>;
-
-def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>;
-def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
-def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
-def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
-
-//===----------------------------------------------------------------------===//
-// MMX Masks
-//===----------------------------------------------------------------------===//
-
-// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
-// PSHUFW imm.
-def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShuffleSHUFImmediate(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
-def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
-def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
-def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
-def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
-}], MMX_SHUFFLE_get_shuf_imm>;
+def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
+def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>;
//===----------------------------------------------------------------------===//
// SSE specific DAG Nodes.
@@ -86,6 +43,21 @@ def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
+def X86pandn : SDNode<"X86ISD::PANDN",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignb : SDNode<"X86ISD::PSIGNB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignw : SDNode<"X86ISD::PSIGNW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignd : SDNode<"X86ISD::PSIGND",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86pblendv : SDNode<"X86ISD::PBLENDVB",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
@@ -102,7 +74,7 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS",
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
@@ -134,18 +106,12 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
-def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
- SDTCisInt<2>]>;
-
def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
-def X86PShufhwLd : SDNode<"X86ISD::PSHUFHW_LD", SDTShuff2OpLdI>;
-def X86PShuflwLd : SDNode<"X86ISD::PSHUFLW_LD", SDTShuff2OpLdI>;
-
def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
@@ -187,9 +153,11 @@ def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
// the top elements. These are used for the SSE 'ss' and 'sd' instruction
// forms.
def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+ SDNPWantRoot]>;
def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+ SDNPWantRoot]>;
def ssmem : Operand<v4f32> {
let PrintMethod = "printf32mem";
@@ -273,6 +241,7 @@ def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
// 256-bit memop pattern fragments
@@ -289,10 +258,7 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 8;
}]>;
-def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>;
-def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
-def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
-def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
+def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>;
// MOVNT Support
// Like 'store', but requires the non-temporal bit to be set
@@ -376,6 +342,18 @@ def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePALIGNRImmediate(N));
}]>;
+// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
+// to VEXTRACTF128 imm.
+def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
+ return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N));
+}]>;
+
+// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to
+// VINSERTF128 imm.
+def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
+ return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
+}]>;
+
def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
@@ -466,3 +444,16 @@ def palign : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_palign_imm>;
+
+def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
+ (extract_subvector node:$bigvec,
+ node:$index), [{
+ return X86::isVEXTRACTF128Index(N);
+}], EXTRACT_get_vextractf128_imm>;
+
+def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
+ node:$index),
+ (insert_subvector node:$bigvec, node:$smallvec,
+ node:$index), [{
+ return X86::isVINSERTF128Index(N);
+}], INSERT_get_vinsertf128_imm>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 5280940..ceb1b65 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -34,7 +34,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/MC/MCAsmInfo.h"
-
#include <limits>
using namespace llvm;
@@ -55,7 +54,11 @@ ReMatPICStubLoad("remat-pic-stub-load",
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
TM(tm), RI(tm, *this) {
- SmallVector<unsigned,16> AmbEntries;
+ enum {
+ TB_NOT_REVERSABLE = 1U << 31,
+ TB_FLAGS = TB_NOT_REVERSABLE
+ };
+
static const unsigned OpTbl2Addr[][2] = {
{ X86::ADC32ri, X86::ADC32mi },
{ X86::ADC32ri8, X86::ADC32mi8 },
@@ -65,13 +68,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::ADC64rr, X86::ADC64mr },
{ X86::ADD16ri, X86::ADD16mi },
{ X86::ADD16ri8, X86::ADD16mi8 },
+ { X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE },
+ { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE },
{ X86::ADD16rr, X86::ADD16mr },
+ { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE },
{ X86::ADD32ri, X86::ADD32mi },
{ X86::ADD32ri8, X86::ADD32mi8 },
+ { X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE },
+ { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE },
{ X86::ADD32rr, X86::ADD32mr },
+ { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE },
{ X86::ADD64ri32, X86::ADD64mi32 },
{ X86::ADD64ri8, X86::ADD64mi8 },
+ { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE },
+ { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE },
{ X86::ADD64rr, X86::ADD64mr },
+ { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE },
{ X86::ADD8ri, X86::ADD8mi },
{ X86::ADD8rr, X86::ADD8mr },
{ X86::AND16ri, X86::AND16mi },
@@ -216,16 +228,21 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
unsigned RegOp = OpTbl2Addr[i][0];
- unsigned MemOp = OpTbl2Addr[i][1];
- if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp,
- std::make_pair(MemOp,0))).second)
- assert(false && "Duplicated entries?");
+ unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
+ assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
+ RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
+
+ // If this is not a reversable operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
// Index 0, folded load and store, no alignment requirement.
unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
- if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
- std::make_pair(RegOp,
- AuxInfo))).second)
- AmbEntries.push_back(MemOp);
+
+ assert(!MemOp2RegOpTable.count(MemOp) &&
+ "Duplicated entries in unfolding maps?");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
// If the third value is 1, then it's folding either a load or a store.
@@ -252,8 +269,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::DIV64r, X86::DIV64m, 1, 0 },
{ X86::DIV8r, X86::DIV8m, 1, 0 },
{ X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
- { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 },
- { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 },
+ { X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 },
{ X86::IDIV16r, X86::IDIV16m, 1, 0 },
{ X86::IDIV32r, X86::IDIV32m, 1, 0 },
{ X86::IDIV64r, X86::IDIV64m, 1, 0 },
@@ -268,7 +285,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOV16rr, X86::MOV16mr, 0, 0 },
{ X86::MOV32ri, X86::MOV32mi, 0, 0 },
{ X86::MOV32rr, X86::MOV32mr, 0, 0 },
- { X86::MOV32rr_TC, X86::MOV32mr_TC, 0, 0 },
{ X86::MOV64ri32, X86::MOV64mi32, 0, 0 },
{ X86::MOV64rr, X86::MOV64mr, 0, 0 },
{ X86::MOV8ri, X86::MOV8mi, 0, 0 },
@@ -312,19 +328,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
- unsigned RegOp = OpTbl0[i][0];
- unsigned MemOp = OpTbl0[i][1];
- unsigned Align = OpTbl0[i][3];
- if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp,
- std::make_pair(MemOp,Align))).second)
- assert(false && "Duplicated entries?");
+ unsigned RegOp = OpTbl0[i][0];
+ unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS;
unsigned FoldedLoad = OpTbl0[i][2];
+ unsigned Align = OpTbl0[i][3];
+ assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
+ RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
+
+ // If this is not a reversable operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
// Index 0, folded load or store.
unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
- if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
- if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
- std::make_pair(RegOp, AuxInfo))).second)
- AmbEntries.push_back(MemOp);
+ assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
static const unsigned OpTbl1[][3] = {
@@ -342,8 +361,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
{ X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
{ X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
- { X86::FsMOVAPDrr, X86::MOVSDrm, 0 },
- { X86::FsMOVAPSrr, X86::MOVSSrm, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 },
+ { X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 },
{ X86::IMUL16rri, X86::IMUL16rmi, 0 },
{ X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
{ X86::IMUL32rri, X86::IMUL32rmi, 0 },
@@ -360,8 +379,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 },
{ X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 },
{ X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
- { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 },
- { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 },
+ { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
+ { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
{ X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
{ X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
{ X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
@@ -370,8 +389,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
{ X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 },
- { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 },
- { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 },
+ { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 },
+ { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 },
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
{ X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
{ X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
@@ -380,7 +399,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
{ X86::MOV16rr, X86::MOV16rm, 0 },
{ X86::MOV32rr, X86::MOV32rm, 0 },
- { X86::MOV32rr_TC, X86::MOV32rm_TC, 0 },
{ X86::MOV64rr, X86::MOV64rm, 0 },
{ X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
{ X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
@@ -439,25 +457,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
unsigned RegOp = OpTbl1[i][0];
- unsigned MemOp = OpTbl1[i][1];
+ unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS;
unsigned Align = OpTbl1[i][2];
- if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp,
- std::make_pair(MemOp,Align))).second)
- assert(false && "Duplicated entries?");
+ assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
+ RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
+
+ // If this is not a reversable operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
// Index 1, folded load
unsigned AuxInfo = 1 | (1 << 4);
- if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
- if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
- std::make_pair(RegOp, AuxInfo))).second)
- AmbEntries.push_back(MemOp);
+ assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
static const unsigned OpTbl2[][3] = {
{ X86::ADC32rr, X86::ADC32rm, 0 },
{ X86::ADC64rr, X86::ADC64rm, 0 },
{ X86::ADD16rr, X86::ADD16rm, 0 },
+ { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
{ X86::ADD32rr, X86::ADD32rm, 0 },
+ { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
{ X86::ADD64rr, X86::ADD64rm, 0 },
+ { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
{ X86::ADD8rr, X86::ADD8rm, 0 },
{ X86::ADDPDrr, X86::ADDPDrm, 16 },
{ X86::ADDPSrr, X86::ADDPSrm, 16 },
@@ -652,20 +676,23 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
unsigned RegOp = OpTbl2[i][0];
- unsigned MemOp = OpTbl2[i][1];
+ unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
unsigned Align = OpTbl2[i][2];
- if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp,
- std::make_pair(MemOp,Align))).second)
- assert(false && "Duplicated entries?");
+
+ assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
+ RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
+
+ // If this is not a reversable operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
// Index 2, folded load
unsigned AuxInfo = 2 | (1 << 4);
- if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
- std::make_pair(RegOp, AuxInfo))).second)
- AmbEntries.push_back(MemOp);
+ assert(!MemOp2RegOpTable.count(MemOp) &&
+ "Duplicated entries in unfolding maps?");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
-
- // Remove ambiguous entries.
- assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
}
bool
@@ -745,9 +772,7 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::MOV8rm:
case X86::MOV16rm:
case X86::MOV32rm:
- case X86::MOV32rm_TC:
case X86::MOV64rm:
- case X86::MOV64rm_TC:
case X86::LD_Fp64m:
case X86::MOVSSrm:
case X86::MOVSDrm:
@@ -768,9 +793,7 @@ static bool isFrameStoreOpcode(int Opcode) {
case X86::MOV8mr:
case X86::MOV16mr:
case X86::MOV32mr:
- case X86::MOV32mr_TC:
case X86::MOV64mr:
- case X86::MOV64mr_TC:
case X86::ST_FpP64m:
case X86::MOVSSmr:
case X86::MOVSDmr:
@@ -785,7 +808,7 @@ static bool isFrameStoreOpcode(int Opcode) {
return false;
}
-unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameLoadOpcode(MI->getOpcode()))
if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
@@ -793,7 +816,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return 0;
}
-unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameLoadOpcode(MI->getOpcode())) {
unsigned Reg;
@@ -923,10 +946,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
isPICBase = true;
}
return isPICBase;
- }
+ }
return false;
}
-
+
case X86::LEA32r:
case X86::LEA64r: {
if (MI->getOperand(2).isImm() &&
@@ -1099,11 +1122,11 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
? X86::LEA64_32r : X86::LEA32r;
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
- unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-
+
// Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
+ // well be shifting and then extracting the lower 16-bits.
// This has the potential to cause partial register stall. e.g.
// movw (%rbp,%rcx,2), %dx
// leal -65(%rdx), %esi
@@ -1137,9 +1160,12 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
break;
case X86::ADD16ri:
case X86::ADD16ri8:
- addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
break;
- case X86::ADD16rr: {
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
unsigned leaInReg2 = 0;
@@ -1149,9 +1175,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
// just a single insert_subreg.
addRegReg(MIB, leaInReg, true, leaInReg, false);
} else {
- leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
+ // well be shifting and then extracting the lower 16-bits.
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
InsMI2 =
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
@@ -1218,7 +1244,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::SHUFPSrri: {
assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
-
+
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
if (B != C) return 0;
@@ -1236,6 +1262,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass))
+ return 0;
+
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
@@ -1250,6 +1281,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
+ // LEA can't handle ESP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass))
+ return 0;
+
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
@@ -1288,6 +1324,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src,
+ MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass :
+ X86::GR32_NOSPRegisterClass))
+ return 0;
+
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
@@ -1310,6 +1354,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src,
+ MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass :
+ X86::GR32_NOSPRegisterClass))
+ return 0;
+
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
@@ -1327,12 +1378,29 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
Src, isKill, -1);
break;
case X86::ADD64rr:
- case X86::ADD32rr: {
+ case X86::ADD64rr_DB:
+ case X86::ADD32rr:
+ case X86::ADD32rr_DB: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r
- : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ unsigned Opc;
+ TargetRegisterClass *RC;
+ if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
+ Opc = X86::LEA64r;
+ RC = X86::GR64_NOSPRegisterClass;
+ } else {
+ Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ RC = X86::GR32_NOSPRegisterClass;
+ }
+
+
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src2) &&
+ !MF.getRegInfo().constrainRegClass(Src2, RC))
+ return 0;
+
NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
@@ -1341,7 +1409,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LV->replaceKillInstruction(Src2, MI, NewMI);
break;
}
- case X86::ADD16rr: {
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
@@ -1357,6 +1426,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::ADD64ri32:
case X86::ADD64ri8:
+ case X86::ADD64ri32_DB:
+ case X86::ADD64ri8_DB:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
.addReg(Dest, RegState::Define |
@@ -1364,7 +1435,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
Src, isKill, MI->getOperand(2).getImm());
break;
case X86::ADD32ri:
- case X86::ADD32ri8: {
+ case X86::ADD32ri8:
+ case X86::ADD32ri_DB:
+ case X86::ADD32ri8_DB: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
@@ -1375,6 +1448,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::ADD16ri:
case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
@@ -1396,7 +1471,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LV->replaceKillInstruction(Dest, MI, NewMI);
}
- MFI->insert(MBBI, NewMI); // Insert the new inst
+ MFI->insert(MBBI, NewMI); // Insert the new inst
return NewMI;
}
@@ -1617,7 +1692,7 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
const TargetInstrDesc &TID = MI->getDesc();
if (!TID.isTerminator()) return false;
-
+
// Conditional branch is a special case.
if (TID.isBranch() && !TID.isBarrier())
return true;
@@ -1626,7 +1701,7 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
return !isPredicated(MI);
}
-bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
@@ -1787,7 +1862,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
I = MBB.end();
++Count;
}
-
+
return Count;
}
@@ -1945,13 +2020,23 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
default:
llvm_unreachable("Unknown regclass");
case X86::GR64RegClassID:
+ case X86::GR64_ABCDRegClassID:
+ case X86::GR64_NOREXRegClassID:
+ case X86::GR64_NOREX_NOSPRegClassID:
case X86::GR64_NOSPRegClassID:
+ case X86::GR64_TCRegClassID:
+ case X86::GR64_TCW64RegClassID:
return load ? X86::MOV64rm : X86::MOV64mr;
case X86::GR32RegClassID:
- case X86::GR32_NOSPRegClassID:
+ case X86::GR32_ABCDRegClassID:
case X86::GR32_ADRegClassID:
+ case X86::GR32_NOREXRegClassID:
+ case X86::GR32_NOSPRegClassID:
+ case X86::GR32_TCRegClassID:
return load ? X86::MOV32rm : X86::MOV32mr;
case X86::GR16RegClassID:
+ case X86::GR16_ABCDRegClassID:
+ case X86::GR16_NOREXRegClassID:
return load ? X86::MOV16rm : X86::MOV16mr;
case X86::GR8RegClassID:
// Copying to or from a physical H register on x86-64 requires a NOREX
@@ -1961,32 +2046,14 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
else
return load ? X86::MOV8rm : X86::MOV8mr;
- case X86::GR64_ABCDRegClassID:
- return load ? X86::MOV64rm : X86::MOV64mr;
- case X86::GR32_ABCDRegClassID:
- return load ? X86::MOV32rm : X86::MOV32mr;
- case X86::GR16_ABCDRegClassID:
- return load ? X86::MOV16rm : X86::MOV16mr;
case X86::GR8_ABCD_LRegClassID:
+ case X86::GR8_NOREXRegClassID:
return load ? X86::MOV8rm :X86::MOV8mr;
case X86::GR8_ABCD_HRegClassID:
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
else
return load ? X86::MOV8rm : X86::MOV8mr;
- case X86::GR64_NOREXRegClassID:
- case X86::GR64_NOREX_NOSPRegClassID:
- return load ? X86::MOV64rm : X86::MOV64mr;
- case X86::GR32_NOREXRegClassID:
- return load ? X86::MOV32rm : X86::MOV32mr;
- case X86::GR16_NOREXRegClassID:
- return load ? X86::MOV16rm : X86::MOV16mr;
- case X86::GR8_NOREXRegClassID:
- return load ? X86::MOV8rm : X86::MOV8mr;
- case X86::GR64_TCRegClassID:
- return load ? X86::MOV64rm_TC : X86::MOV64mr_TC;
- case X86::GR32_TCRegClassID:
- return load ? X86::MOV32rm_TC : X86::MOV32mr_TC;
case X86::RFP80RegClassID:
return load ? X86::LD_Fp80m : X86::ST_FpP80m;
case X86::RFP64RegClassID:
@@ -2085,76 +2152,6 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
NewMIs.push_back(MIB);
}
-bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL = MBB.findDebugLoc(MI);
-
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
- bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
- unsigned SlotSize = is64Bit ? 8 : 4;
-
- MachineFunction &MF = *MBB.getParent();
- unsigned FPReg = RI.getFrameRegister(MF);
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- unsigned CalleeFrameSize = 0;
-
- unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
- // Add the callee-saved register as live-in. It's killed at the spill.
- MBB.addLiveIn(Reg);
- if (Reg == FPReg)
- // X86RegisterInfo::emitPrologue will handle spilling of frame register.
- continue;
- if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
- CalleeFrameSize += SlotSize;
- BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
- } else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
- RC, &RI);
- }
- }
-
- X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
- return true;
-}
-
-bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL = MBB.findDebugLoc(MI);
-
- MachineFunction &MF = *MBB.getParent();
- unsigned FPReg = RI.getFrameRegister(MF);
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
- bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
- unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (Reg == FPReg)
- // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
- continue;
- if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
- BuildMI(MBB, MI, DL, get(Opc), Reg);
- } else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
- RC, &RI);
- }
- }
- return true;
-}
-
MachineInstr*
X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -2181,7 +2178,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
MIB.addOperand(MOs[i]);
if (NumAddrOps < 4) // FrameIndex only
addOffset(MIB, 0);
-
+
// Loop over the rest of the ri operands, converting them over.
unsigned NumOps = MI->getDesc().getNumOperands()-2;
for (unsigned i = 0; i != NumOps; ++i) {
@@ -2202,7 +2199,7 @@ static MachineInstr *FuseInst(MachineFunction &MF,
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
MI->getDebugLoc(), true);
MachineInstrBuilder MIB(NewMI);
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (i == OpNo) {
@@ -2238,7 +2235,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI, unsigned i,
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Align) const {
- const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
+ const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
bool isTwoAddrFold = false;
unsigned NumOps = MI->getDesc().getNumOperands();
bool isTwoAddr = NumOps > 1 &&
@@ -2251,7 +2248,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
if (isTwoAddr && NumOps >= 2 && i < 2 &&
MI->getOperand(0).isReg() &&
MI->getOperand(1).isReg() &&
- MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
isTwoAddrFold = true;
} else if (i == 0) { // If operand 0
@@ -2265,19 +2262,19 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
if (NewMI)
return NewMI;
-
+
OpcodeTablePtr = &RegOp2MemOpTable0;
} else if (i == 1) {
OpcodeTablePtr = &RegOp2MemOpTable1;
} else if (i == 2) {
OpcodeTablePtr = &RegOp2MemOpTable2;
}
-
+
// If table selected...
if (OpcodeTablePtr) {
// Find the Opcode to fuse
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- OpcodeTablePtr->find((unsigned*)MI->getOpcode());
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ OpcodeTablePtr->find(MI->getOpcode());
if (I != OpcodeTablePtr->end()) {
unsigned Opcode = I->second.first;
unsigned MinAlign = I->second.second;
@@ -2320,8 +2317,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return NewMI;
}
}
-
- // No fusion
+
+ // No fusion
if (PrintFailedFusing && !MI->isCopy())
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
return NULL;
@@ -2332,7 +2329,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const {
- // Check switch flag
+ // Check switch flag
if (NoFusing) return NULL;
if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
@@ -2343,8 +2340,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
case X86::Int_CVTSS2SDrr:
case X86::RCPSSr:
case X86::RCPSSr_Int:
- case X86::ROUNDSDr_Int:
- case X86::ROUNDSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
@@ -2384,7 +2381,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const {
- // Check switch flag
+ // Check switch flag
if (NoFusing) return NULL;
if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
@@ -2395,8 +2392,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
case X86::Int_CVTSS2SDrr:
case X86::RCPSSr:
case X86::RCPSSr_Int:
- case X86::ROUNDSDr_Int:
- case X86::ROUNDSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
@@ -2424,9 +2421,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = 16;
break;
case X86::FsFLD0SD:
+ case X86::VFsFLD0SD:
Alignment = 8;
break;
case X86::FsFLD0SS:
+ case X86::VFsFLD0SS:
Alignment = 4;
break;
default:
@@ -2490,9 +2489,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineConstantPool &MCP = *MF.getConstantPool();
const Type *Ty;
unsigned Opc = LoadMI->getOpcode();
- if (Opc == X86::FsFLD0SS)
+ if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (Opc == X86::FsFLD0SD)
+ else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
@@ -2525,13 +2524,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const {
- // Check switch flag
+ // Check switch flag
if (NoFusing) return 0;
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
switch (MI->getOpcode()) {
default: return false;
- case X86::TEST8rr:
+ case X86::TEST8rr:
case X86::TEST16rr:
case X86::TEST32rr:
case X86::TEST64rr:
@@ -2551,16 +2550,15 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
// replacing the *two* registers with the memory location.
- const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
- if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
+ const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
} else if (OpNum == 0) { // If operand 0
switch (Opc) {
case X86::MOV8r0:
case X86::MOV16r0:
case X86::MOV32r0:
- case X86::MOV64r0:
- return true;
+ case X86::MOV64r0: return true;
default: break;
}
OpcodeTablePtr = &RegOp2MemOpTable0;
@@ -2569,22 +2567,17 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
} else if (OpNum == 2) {
OpcodeTablePtr = &RegOp2MemOpTable2;
}
-
- if (OpcodeTablePtr) {
- // Find the Opcode to fuse
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- OpcodeTablePtr->find((unsigned*)Opc);
- if (I != OpcodeTablePtr->end())
- return true;
- }
+
+ if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
+ return true;
return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
}
bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(MI->getOpcode());
if (I == MemOp2RegOpTable.end())
return false;
unsigned Opc = I->second.first;
@@ -2644,7 +2637,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
// Emit the data processing instruction.
MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true);
MachineInstrBuilder MIB(DataMI);
-
+
if (FoldedStore)
MIB.addReg(Reg, RegState::Define);
for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
@@ -2712,8 +2705,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
if (!N->isMachineOpcode())
return false;
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(N->getMachineOpcode());
if (I == MemOp2RegOpTable.end())
return false;
unsigned Opc = I->second.first;
@@ -2813,8 +2806,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex) const {
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- MemOp2RegOpTable.find((unsigned*)Opc);
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(Opc);
if (I == MemOp2RegOpTable.end())
return 0;
bool FoldedLoad = I->second.second & (1 << 4);
@@ -2993,6 +2986,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
+ case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
return true;
}
return false;
@@ -3090,6 +3085,41 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ switch (DefMI->getOpcode()) {
+ default: return false;
+ case X86::DIVSDrm:
+ case X86::DIVSDrm_Int:
+ case X86::DIVSDrr:
+ case X86::DIVSDrr_Int:
+ case X86::DIVSSrm:
+ case X86::DIVSSrm_Int:
+ case X86::DIVSSrr:
+ case X86::DIVSSrr_Int:
+ case X86::SQRTPDm:
+ case X86::SQRTPDm_Int:
+ case X86::SQRTPDr:
+ case X86::SQRTPDr_Int:
+ case X86::SQRTPSm:
+ case X86::SQRTPSm_Int:
+ case X86::SQRTPSr:
+ case X86::SQRTPSr_Int:
+ case X86::SQRTSDm:
+ case X86::SQRTSDm_Int:
+ case X86::SQRTSDr:
+ case X86::SQRTSDr_Int:
+ case X86::SQRTSSm:
+ case X86::SQRTSSm_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ return true;
+ }
+}
+
namespace {
/// CGBR - Create Global Base Reg pass. This initializes the PIC
/// global base register for x86-32.
@@ -3108,6 +3138,13 @@ namespace {
if (TM->getRelocationModel() != Reloc::PIC_)
return false;
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+
+ // If we didn't need a GlobalBaseReg, don't insert code.
+ if (GlobalBaseReg == 0)
+ return false;
+
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = MF.front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
@@ -3119,16 +3156,15 @@ namespace {
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
else
- PC = TII->getGlobalBaseReg(&MF);
-
+ PC = GlobalBaseReg;
+
// Operand of MovePCtoStack is completely ignored by asm printer. It's
// only used in JIT code emission as displacement to pc.
BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
-
+
// If we're using vanilla 'GOT' PIC style, we should use relative addressing
// not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
- unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF);
// Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
.addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index f336206..1d44207 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -174,7 +174,7 @@ namespace X86II {
/// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
/// reference is actually to the "FOO$stub" symbol. This is used for calls
- /// and jumps to external functions on Tiger and before.
+ /// and jumps to external functions on Tiger and earlier.
MO_DARWIN_STUB,
/// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
@@ -311,12 +311,17 @@ namespace X86II {
MRM_F0 = 40,
MRM_F8 = 41,
MRM_F9 = 42,
+
+ /// RawFrmImm8 - This is used for the ENTER instruction, which has two
+ /// immediates, the first of which is a 16-bit immediate (specified by
+ /// the imm encoding) and the second is a 8-bit fixed value.
+ RawFrmImm8 = 43,
/// RawFrmImm16 - This is used for CALL FAR instructions, which have two
/// immediates, the first of which is a 16 or 32-bit immediate (specified by
/// the imm encoding) and the second is a 16-bit fixed value. In the AMD
/// manual, this operand is described as pntr16:32 and pntr16:16
- RawFrmImm16 = 43,
+ RawFrmImm16 = 44,
FormMask = 63,
@@ -444,28 +449,36 @@ namespace X86II {
OpcodeMask = 0xFF << OpcodeShift,
//===------------------------------------------------------------------===//
- // VEX - The opcode prefix used by AVX instructions
+ /// VEX - The opcode prefix used by AVX instructions
VEX = 1U << 0,
- // VEX_W - Has a opcode specific functionality, but is used in the same
- // way as REX_W is for regular SSE instructions.
+ /// VEX_W - Has a opcode specific functionality, but is used in the same
+ /// way as REX_W is for regular SSE instructions.
VEX_W = 1U << 1,
- // VEX_4V - Used to specify an additional AVX/SSE register. Several 2
- // address instructions in SSE are represented as 3 address ones in AVX
- // and the additional register is encoded in VEX_VVVV prefix.
+ /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+ /// address instructions in SSE are represented as 3 address ones in AVX
+ /// and the additional register is encoded in VEX_VVVV prefix.
VEX_4V = 1U << 2,
- // VEX_I8IMM - Specifies that the last register used in a AVX instruction,
- // must be encoded in the i8 immediate field. This usually happens in
- // instructions with 4 operands.
+ /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+ /// must be encoded in the i8 immediate field. This usually happens in
+ /// instructions with 4 operands.
VEX_I8IMM = 1U << 3,
- // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
- // instruction uses 256-bit wide registers. This is usually auto detected if
- // a VR256 register is used, but some AVX instructions also have this field
- // marked when using a f256 memory references.
- VEX_L = 1U << 4
+ /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+ /// instruction uses 256-bit wide registers. This is usually auto detected
+ /// if a VR256 register is used, but some AVX instructions also have this
+ /// field marked when using a f256 memory references.
+ VEX_L = 1U << 4,
+
+ /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
+ /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
+ /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
+ /// storing a classifier in the imm8 field. To simplify our implementation,
+ /// we handle this by storeing the classifier in the opcode field and using
+ /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
+ Has3DNow0F0FOpcode = 1U << 5
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -528,6 +541,7 @@ namespace X86II {
case X86II::AddRegFrm:
case X86II::MRMDestReg:
case X86II::MRMSrcReg:
+ case X86II::RawFrmImm8:
case X86II::RawFrmImm16:
return -1;
case X86II::MRMDestMem:
@@ -599,14 +613,14 @@ class X86InstrInfo : public TargetInstrInfoImpl {
/// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
/// RegOp2MemOpTable2 - Load / store folding opcode maps.
///
- DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
- DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
- DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
- DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
+ DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
+ DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
+ DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
+ DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
- DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+ DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -728,17 +742,6 @@ public:
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
- virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
- virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
virtual
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -845,18 +848,23 @@ public:
/// SetSSEDomain - Set the SSEDomain of MI.
void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
+ MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ unsigned OpNum,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ unsigned Size, unsigned Alignment) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- unsigned OpNum,
- const SmallVectorImpl<MachineOperand> &MOs,
- unsigned Size, unsigned Alignment) const;
-
/// isFrameOperand - Return true and the FrameIndex if the specified
/// operand and follow operands form a reference to the stack frame.
bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 09b7721..87dc4be 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1,10 +1,10 @@
-//===----------------------------------------------------------------------===//
-//
+//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 instruction set, defining the instructions, and
@@ -35,6 +35,20 @@ def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
+// RES1, RES2, FLAGS = op LHS, RHS
+def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
def SDTX86BrCond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>,
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
@@ -46,7 +60,7 @@ def SDTX86SetCC_C : SDTypeProfile<1, 2,
[SDTCisInt<0>,
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
-def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
SDTCisVT<2, i8>]>;
def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -64,6 +78,12 @@ def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
SDTCisVT<1, iPTR>,
SDTCisVT<2, iPTR>]>;
+def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i8>,
+ SDTCisVT<4, i32>]>;
+
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
def SDTX86Void : SDTypeProfile<0, 0, []>;
@@ -72,9 +92,7 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-
-def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
+def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
@@ -110,82 +128,85 @@ def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
- SDNPMayLoad]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
- SDNPMayLoad]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
- [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def X86vastart_save_xmm_regs :
SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
SDT_X86VASTART_SAVE_XMM_REGS,
[SDNPHasChain, SDNPVariadic]>;
-
+def X86vaarg64 :
+ SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
def X86callseq_start :
SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def X86callseq_end :
SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
- [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>;
def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad]>;
def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
- [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>;
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-def X86SegmentBaseAddress : SDNode<"X86ISD::SegmentBaseAddress",
- SDT_X86SegmentBaseAddress, []>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
[SDNPHasChain]>;
-def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
- [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
-def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
[SDNPCommutative]>;
-
+def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
+def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
+
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
@@ -197,11 +218,11 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
-def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
-
+def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
- []>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
@@ -252,6 +273,10 @@ def i8mem_NOREX : Operand<i64> {
let ParserMatchClass = X86MemAsmOperand;
}
+// GPRs available for tailcall.
+// It represents GR64_TC or GR64_TCW64.
+def ptr_rc_tailcall : PointerLikeRegClass<2>;
+
// Special i32mem for addresses of load folding tail calls. These are not
// allowed to use callee-saved registers since they must be scheduled
// after callee-saved register are popped.
@@ -261,6 +286,15 @@ def i32mem_TC : Operand<i32> {
let ParserMatchClass = X86MemAsmOperand;
}
+// Special i64mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i64mem_TC : Operand<i64> {
+ let PrintMethod = "printi64mem";
+ let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
+ ptr_rc_tailcall, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
let ParserMatchClass = X86AbsMemAsmOperand,
PrintMethod = "print_pcrel_imm" in {
@@ -332,43 +366,77 @@ def i32i8imm : Operand<i32> {
let ParserMatchClass = ImmSExti32i8AsmOperand;
}
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i32AsmOperand;
+}
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+ let PrintMethod = "print_pcrel_imm";
+ let ParserMatchClass = X86AbsMemAsmOperand;
+}
+
+// 64-bits but only 8 bits are significant.
+def i64i8imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i8AsmOperand;
+}
+
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let AsmOperandLowerMethod = "lower_lea64_32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+
//===----------------------------------------------------------------------===//
// X86 Complex Pattern Definitions.
//
// Define X86 specific addressing mode.
-def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
+def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex],
[]>;
def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
+def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or, frameindex,
+ X86WrapperRIP], []>;
+
+def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
def HasCMov : Predicate<"Subtarget->hasCMov()">;
def NoCMov : Predicate<"!Subtarget->hasCMov()">;
-// FIXME: temporary hack to let codegen assert or generate poor code in case
-// no AVX version of the desired intructions is present, this is better for
-// incremental dev (without fallbacks it's easier to spot what's missing)
-def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">;
-def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
-def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
-def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
-def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
-def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
-def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
-def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">;
+def HasMMX : Predicate<"Subtarget->hasMMX()">;
+def Has3DNow : Predicate<"Subtarget->has3DNow()">;
+def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
+def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
+
+def HasAES : Predicate<"Subtarget->hasAES()">;
def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
-def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
-def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
-def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
-def In64BitMode : Predicate<"Subtarget->is64Bit()">;
+def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
+def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
+def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate;
+def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
@@ -383,7 +451,6 @@ def OptForSize : Predicate<"OptForSize">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
-def HasAES : Predicate<"Subtarget->hasAES()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@@ -418,40 +485,24 @@ def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
def i16immSExt8 : PatLeaf<(i16 immSext8)>;
def i32immSExt8 : PatLeaf<(i32 immSext8)>;
-
-/// Load patterns: these constraint the match to the right address space.
-def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
-}]>;
-
-def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- return PT->getAddressSpace() == 256;
- return false;
+def i64immSExt8 : PatLeaf<(i64 immSext8)>;
+def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
+def i64immZExt32 : PatLeaf<(i64 imm), [{
+ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // unsignedsign extended field.
+ return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
}]>;
-def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- return PT->getAddressSpace() == 257;
- return false;
+def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{
+ uint64_t v = N->getZExtValue();
+ return v == (uint32_t)v && (int32_t)v == (int8_t)v;
}]>;
-
// Helper fragments for loads.
// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
// known to be 32-bit aligned or better. Ditto for i8 to i16.
def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (const Value *Src = LD->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD)
return true;
@@ -462,10 +513,6 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (const Value *Src = LD->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::EXTLOAD)
return LD->getAlignment() >= 2 && !LD->isVolatile();
@@ -474,10 +521,6 @@ def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (const Value *Src = LD->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD)
return true;
@@ -486,15 +529,18 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
return false;
}]>;
-def loadi8 : PatFrag<(ops node:$ptr), (i8 (dsload node:$ptr))>;
-def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>;
-def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>;
-def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>;
-def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>;
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>;
def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
@@ -502,6 +548,10 @@ def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>;
def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
@@ -509,6 +559,10 @@ def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
// An 'and' node with a single use.
@@ -524,66 +578,10 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
return N->hasOneUse();
}]>;
-// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
-def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
-
- unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
- APInt Mask = APInt::getAllOnesValue(BitWidth);
- APInt KnownZero0, KnownOne0;
- CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
- APInt KnownZero1, KnownOne1;
- CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
- return (~KnownZero0 & ~KnownZero1) == 0;
-}]>;
-
//===----------------------------------------------------------------------===//
-// Instruction list...
+// Instruction list.
//
-// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
-// a stack adjustment and the codegen must know that they may modify the stack
-// pointer before prolog-epilog rewriting occurs.
-// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
-// sub / add which can clobber EFLAGS.
-let Defs = [ESP, EFLAGS], Uses = [ESP] in {
-def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
- "#ADJCALLSTACKDOWN",
- [(X86callseq_start timm:$amt)]>,
- Requires<[In32BitMode]>;
-def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKUP",
- [(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[In32BitMode]>;
-}
-
-// x86-64 va_start lowering magic.
-let usesCustomInserter = 1 in {
-def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
- (outs),
- (ins GR8:$al,
- i64imm:$regsavefi, i64imm:$offset,
- variable_ops),
- "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
- [(X86vastart_save_xmm_regs GR8:$al,
- imm:$regsavefi,
- imm:$offset)]>;
-
-// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets. Calls
-// to _alloca is needed to probe the stack when allocating more than 4k bytes in
-// one go. Touching the stack at 4K increments is necessary to ensure that the
-// guard pages used by the OS virtual memory manager are allocated in correct
-// sequence.
-// The main point of having separate instruction are extra unmodelled effects
-// (compared to ordinary calls) like stack pointer change.
-
-let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
- def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
- "# dynamic stack allocation",
- [(X86MingwAlloca)]>;
-}
-
// Nop
let neverHasSideEffects = 1 in {
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
@@ -593,206 +591,22 @@ let neverHasSideEffects = 1 in {
"nop{l}\t$zero", []>, TB;
}
-// Trap
-let Uses = [EFLAGS] in {
- def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
-}
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
- [(int_x86_int (i8 3))]>;
-// FIXME: need to make sure that "int $3" matches int3
-def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
- [(int_x86_int imm:$trap)]>;
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
-def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>;
-
-// PIC base construction. This expands to code that looks like this:
-// call $next_inst
-// popl %destreg"
-let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
- def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
- "", []>;
-
-//===----------------------------------------------------------------------===//
-// Control Flow Instructions.
-//
-
-// Return instructions.
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1, FPForm = SpecialFP in {
- def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
- "ret",
- [(X86retflag 0)]>;
- def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
- "ret\t$amt",
- [(X86retflag timm:$amt)]>;
- def LRET : I <0xCB, RawFrm, (outs), (ins),
- "lret", []>;
- def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "lret\t$amt", []>;
-}
-
-// Unconditional branches.
-let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
- def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
- "jmp\t$dst", [(br bb:$dst)]>;
- def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
- "jmp\t$dst", []>;
-}
-
-// Conditional Branches.
-let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
- multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
- def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
- def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
- [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
- }
-}
-
-defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
-defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
-defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
-defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
-defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
-defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
-defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
-defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
-defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
-defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
-defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
-defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
-defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
-defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
-defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
-defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
-
-// FIXME: What about the CX/RCX versions of this instruction?
-let Uses = [ECX], isBranch = 1, isTerminator = 1 in
- def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jcxz\t$dst", []>;
-
-
-// Indirect branches
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
- def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
- [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
- def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
- [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
-
- def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
- (ins i16imm:$off, i16imm:$seg),
- "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
- def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
- (ins i32imm:$off, i16imm:$seg),
- "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
-
- def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
- "ljmp{w}\t{*}$dst", []>, OpSize;
- def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
- "ljmp{l}\t{*}$dst", []>;
-}
-
-
-// Loop instructions
-
-def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
-def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
-def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
-
-//===----------------------------------------------------------------------===//
-// Call Instructions...
-//
-let isCall = 1 in
- // All calls clobber the non-callee saved registers. ESP is marked as
- // a use to prevent stack-pointer assignments that appear immediately
- // before calls from potentially appearing dead. Uses for argument
- // registers are added manually.
- let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in {
- def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i32imm_pcrel:$dst,variable_ops),
- "call\t$dst", []>;
- def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
- "call\t{*}$dst", [(X86call GR32:$dst)]>;
- def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
- "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
-
- def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
- (ins i16imm:$off, i16imm:$seg),
- "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
- def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
- (ins i32imm:$off, i16imm:$seg),
- "lcall{l}\t{$seg, $off|$off, $seg}", []>;
-
- def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
- "lcall{w}\t{*}$dst", []>, OpSize;
- def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
- "lcall{l}\t{*}$dst", []>;
-
- // callw for 16 bit code for the assembler.
- let isAsmParserOnly = 1 in
- def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
- (outs), (ins i16imm_pcrel:$dst, variable_ops),
- "callw\t$dst", []>, OpSize;
- }
// Constructing a stack frame.
+def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
+ "enter\t$len, $lvl", []>;
-def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
- "enter\t$len, $lvl", []>;
-
-// Tail call stuff.
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
- isCodeGenOnly = 1 in
- let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in {
- def TCRETURNdi : I<0, Pseudo, (outs),
- (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
- def TCRETURNri : I<0, Pseudo, (outs),
- (ins GR32_TC:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
- let mayLoad = 1 in
- def TCRETURNmi : I<0, Pseudo, (outs),
- (ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
-
- // FIXME: The should be pseudo instructions that are lowered when going to
- // mcinst.
- def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
- (ins i32imm_pcrel:$dst, variable_ops),
- "jmp\t$dst # TAILCALL",
- []>;
- def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
- "", []>; // FIXME: Remove encoding when JIT is dead.
- let mayLoad = 1 in
- def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
- "jmp{l}\t{*}$dst # TAILCALL", []>;
-}
-
-//===----------------------------------------------------------------------===//
-// Miscellaneous Instructions...
-//
let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
def LEAVE : I<0xC9, RawFrm,
(outs), (ins), "leave", []>, Requires<[In32BitMode]>;
-def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
-let mayLoad = 1 in
-def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
-def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
-def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+def LEAVE64 : I<0xC9, RawFrm,
+ (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
let mayLoad = 1 in {
@@ -805,6 +619,10 @@ def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>,
OpSize;
def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>;
+
+def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+ Requires<[In32BitMode]>;
}
let mayStore = 1 in {
@@ -817,29 +635,54 @@ def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>,
OpSize;
def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
-}
-}
-let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
+def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
"push{l}\t$imm", []>;
-def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
"push{w}\t$imm", []>, OpSize;
-def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
"push{l}\t$imm", []>;
-}
-let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in {
-def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
-def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
- Requires<[In32BitMode]>;
-}
-let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in {
def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
Requires<[In32BitMode]>;
+
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP64r : I<0x58, AddRegFrm,
+ (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
+}
+let mayStore = 1 in {
+def PUSH64r : I<0x50, AddRegFrm,
+ (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
+}
}
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
+ "push{q}\t$imm", []>;
+def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{q}\t$imm", []>;
+def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
+ "push{q}\t$imm", []>;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+ Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+ Requires<[In64BitMode]>;
+
+
+
let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
mayLoad=1, neverHasSideEffects=1 in {
def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>,
@@ -851,12 +694,16 @@ def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>,
Requires<[In32BitMode]>;
}
-let Uses = [EFLAGS], Constraints = "$src = $dst" in // GR32 = bswap GR32
- def BSWAP32r : I<0xC8, AddRegFrm,
- (outs GR32:$dst), (ins GR32:$src),
- "bswap{l}\t$dst",
- [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+let Constraints = "$src = $dst" in { // GR32 = bswap GR32
+def BSWAP32r : I<0xC8, AddRegFrm,
+ (outs GR32:$dst), (ins GR32:$src),
+ "bswap{l}\t$dst",
+ [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+ "bswap{q}\t$dst",
+ [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+} // Constraints = "$src = $dst"
// Bit scan instructions.
let Defs = [EFLAGS] in {
@@ -873,6 +720,12 @@ def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB;
+def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
+def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
@@ -887,44 +740,23 @@ def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB;
+def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
+def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
} // Defs = [EFLAGS]
-let neverHasSideEffects = 1 in
-def LEA16r : I<0x8D, MRMSrcMem,
- (outs GR16:$dst), (ins i32mem:$src),
- "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
-let isReMaterializable = 1 in
-def LEA32r : I<0x8D, MRMSrcMem,
- (outs GR32:$dst), (ins i32mem:$src),
- "lea{l}\t{$src|$dst}, {$dst|$src}",
- [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
-
-let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
-def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
- [(X86rep_movs i8)]>, REP;
-def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
- [(X86rep_movs i16)]>, REP, OpSize;
-def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
- [(X86rep_movs i32)]>, REP;
-}
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>;
def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize;
def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>;
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
}
-let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
- [(X86rep_stos i8)]>, REP;
-let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
- [(X86rep_stos i16)]>, REP, OpSize;
-let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
- [(X86rep_stos i32)]>, REP;
-
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>;
@@ -932,91 +764,24 @@ let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize;
let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>;
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>;
def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize;
def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>;
-
-let Defs = [RAX, RDX] in
-def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
- TB;
-
-let Defs = [RAX, RCX, RDX] in
-def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
-
-let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
-def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
-}
-
-def SYSCALL : I<0x05, RawFrm,
- (outs), (ins), "syscall", []>, TB;
-def SYSRET : I<0x07, RawFrm,
- (outs), (ins), "sysret", []>, TB;
-def SYSENTER : I<0x34, RawFrm,
- (outs), (ins), "sysenter", []>, TB;
-def SYSEXIT : I<0x35, RawFrm,
- (outs), (ins), "sysexit", []>, TB, Requires<[In32BitMode]>;
-
-def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
//===----------------------------------------------------------------------===//
-// Input/Output Instructions...
+// Move Instructions.
//
-let Defs = [AL], Uses = [DX] in
-def IN8rr : I<0xEC, RawFrm, (outs), (ins),
- "in{b}\t{%dx, %al|%AL, %DX}", []>;
-let Defs = [AX], Uses = [DX] in
-def IN16rr : I<0xED, RawFrm, (outs), (ins),
- "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize;
-let Defs = [EAX], Uses = [DX] in
-def IN32rr : I<0xED, RawFrm, (outs), (ins),
- "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
-
-let Defs = [AL] in
-def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i16i8imm:$port),
- "in{b}\t{$port, %al|%AL, $port}", []>;
-let Defs = [AX] in
-def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
- "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
-let Defs = [EAX] in
-def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
- "in{l}\t{$port, %eax|%EAX, $port}", []>;
-
-let Uses = [DX, AL] in
-def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
- "out{b}\t{%al, %dx|%DX, %AL}", []>;
-let Uses = [DX, AX] in
-def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
- "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
-let Uses = [DX, EAX] in
-def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
- "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
-
-let Uses = [AL] in
-def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i16i8imm:$port),
- "out{b}\t{%al, $port|$port, %AL}", []>;
-let Uses = [AX] in
-def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
- "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
-let Uses = [EAX] in
-def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
- "out{l}\t{%eax, $port|$port, %EAX}", []>;
-
-def IN8 : I<0x6C, RawFrm, (outs), (ins),
- "ins{b}", []>;
-def IN16 : I<0x6D, RawFrm, (outs), (ins),
- "ins{w}", []>, OpSize;
-def IN32 : I<0x6D, RawFrm, (outs), (ins),
- "ins{l}", []>;
-//===----------------------------------------------------------------------===//
-// Move Instructions...
-//
let neverHasSideEffects = 1 in {
def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}", []>;
@@ -1024,6 +789,8 @@ def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
@@ -1035,6 +802,12 @@ def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, imm:$src)]>;
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+ "movabs{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, i64immSExt32:$src)]>;
}
def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
@@ -1046,6 +819,9 @@ def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(store (i32 imm:$src), addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store i64immSExt32:$src, addr:$dst)]>;
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
@@ -1067,24 +843,22 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
"mov{l}\t{%eax, $dst|$dst, %eax}", []>,
Requires<[In32BitMode]>;
-
-// Moves to and from segment registers
-def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
+
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
+def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
+ "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
+ "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+*/
+
let isCodeGenOnly = 1 in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
@@ -1093,6 +867,8 @@ def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
@@ -1105,6 +881,9 @@ def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (loadi32 addr:$src))]>;
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (load addr:$src))]>;
}
def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
@@ -1116,24 +895,9 @@ def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(store GR32:$src, addr:$dst)]>;
-
-/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC.
-let isCodeGenOnly = 1 in {
-let neverHasSideEffects = 1 in
-def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-
-let mayLoad = 1,
- canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV32rm_TC : I<0x8B, MRMSrcMem, (outs GR32_TC:$dst), (ins i32mem_TC:$src),
- "mov{l}\t{$src, $dst|$dst, $src}",
- []>;
-
-let mayStore = 1 in
-def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src),
- "mov{l}\t{$src, $dst|$dst, $src}",
- []>;
-}
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store GR64:$src, addr:$dst)]>;
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
// that they can be used for copying and storing h registers, which can't be
@@ -1154,2219 +918,6 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
}
-// Moves to and from debug registers
-def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Moves to and from control registers
-def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-//===----------------------------------------------------------------------===//
-// Fixed-Register Multiplication and Division Instructions...
-//
-
-// Extra precision multiplication
-
-// AL is really implied by AX, but the registers in Defs must match the
-// SDNode results (i8, i32).
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
- // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
- // This probably ought to be moved to a def : Pat<> if the
- // syntax can be accepted.
- [(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)]>; // AL,AH = AL*GR8
-
-let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
-def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
- "mul{w}\t$src",
- []>, OpSize; // AX,DX = AX*GR16
-
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
-def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
- "mul{l}\t$src",
- []>; // EAX,EDX = EAX*GR32
-
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
- "mul{b}\t$src",
- // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
- // This probably ought to be moved to a def : Pat<> if the
- // syntax can be accepted.
- [(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)]>; // AL,AH = AL*[mem8]
-
-let mayLoad = 1, neverHasSideEffects = 1 in {
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
- "mul{w}\t$src",
- []>, OpSize; // AX,DX = AX*[mem16]
-
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
- "mul{l}\t$src",
- []>; // EAX,EDX = EAX*[mem32]
-}
-
-let neverHasSideEffects = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>;
- // AL,AH = AL*GR8
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
- OpSize; // AX,DX = AX*GR16
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>;
- // EAX,EDX = EAX*GR32
-let mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", []>; // AL,AH = AL*[mem8]
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
-}
-} // neverHasSideEffects
-
-// unsigned division/remainder
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
- "div{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
- "div{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
- "div{l}\t$src", []>;
-let mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
- // EDX:EAX/[mem32] = EAX,EDX
-def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", []>;
-}
-
-// Signed division/remainder.
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
- "idiv{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
- "idiv{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
- "idiv{l}\t$src", []>;
-let mayLoad = 1, mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- // EDX:EAX/[mem32] = EAX,EDX
- "idiv{l}\t$src", []>;
-}
-
-//===----------------------------------------------------------------------===//
-// Two address Instructions.
-//
-let Constraints = "$src1 = $dst" in {
-
-// Conditional moves
-let Uses = [EFLAGS] in {
-
-let Predicates = [HasCMov] in {
-let isCommutable = 1 in {
-def CMOVB16rr : I<0x42, MRMSrcReg, // if <u, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_B, EFLAGS))]>,
- TB, OpSize;
-def CMOVB32rr : I<0x42, MRMSrcReg, // if <u, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_B, EFLAGS))]>,
- TB;
-def CMOVAE16rr: I<0x43, MRMSrcReg, // if >=u, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovae{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_AE, EFLAGS))]>,
- TB, OpSize;
-def CMOVAE32rr: I<0x43, MRMSrcReg, // if >=u, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovae{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_AE, EFLAGS))]>,
- TB;
-def CMOVE16rr : I<0x44, MRMSrcReg, // if ==, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmove{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_E, EFLAGS))]>,
- TB, OpSize;
-def CMOVE32rr : I<0x44, MRMSrcReg, // if ==, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmove{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_E, EFLAGS))]>,
- TB;
-def CMOVNE16rr: I<0x45, MRMSrcReg, // if !=, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovne{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_NE, EFLAGS))]>,
- TB, OpSize;
-def CMOVNE32rr: I<0x45, MRMSrcReg, // if !=, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovne{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_NE, EFLAGS))]>,
- TB;
-def CMOVBE16rr: I<0x46, MRMSrcReg, // if <=u, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovbe{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_BE, EFLAGS))]>,
- TB, OpSize;
-def CMOVBE32rr: I<0x46, MRMSrcReg, // if <=u, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovbe{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_BE, EFLAGS))]>,
- TB;
-def CMOVA16rr : I<0x47, MRMSrcReg, // if >u, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmova{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_A, EFLAGS))]>,
- TB, OpSize;
-def CMOVA32rr : I<0x47, MRMSrcReg, // if >u, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmova{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_A, EFLAGS))]>,
- TB;
-def CMOVL16rr : I<0x4C, MRMSrcReg, // if <s, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovl{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_L, EFLAGS))]>,
- TB, OpSize;
-def CMOVL32rr : I<0x4C, MRMSrcReg, // if <s, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovl{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_L, EFLAGS))]>,
- TB;
-def CMOVGE16rr: I<0x4D, MRMSrcReg, // if >=s, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovge{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_GE, EFLAGS))]>,
- TB, OpSize;
-def CMOVGE32rr: I<0x4D, MRMSrcReg, // if >=s, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovge{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_GE, EFLAGS))]>,
- TB;
-def CMOVLE16rr: I<0x4E, MRMSrcReg, // if <=s, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovle{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_LE, EFLAGS))]>,
- TB, OpSize;
-def CMOVLE32rr: I<0x4E, MRMSrcReg, // if <=s, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovle{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_LE, EFLAGS))]>,
- TB;
-def CMOVG16rr : I<0x4F, MRMSrcReg, // if >s, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovg{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_G, EFLAGS))]>,
- TB, OpSize;
-def CMOVG32rr : I<0x4F, MRMSrcReg, // if >s, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovg{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_G, EFLAGS))]>,
- TB;
-def CMOVS16rr : I<0x48, MRMSrcReg, // if signed, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovs{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_S, EFLAGS))]>,
- TB, OpSize;
-def CMOVS32rr : I<0x48, MRMSrcReg, // if signed, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovs{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_S, EFLAGS))]>,
- TB;
-def CMOVNS16rr: I<0x49, MRMSrcReg, // if !signed, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovns{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_NS, EFLAGS))]>,
- TB, OpSize;
-def CMOVNS32rr: I<0x49, MRMSrcReg, // if !signed, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovns{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_NS, EFLAGS))]>,
- TB;
-def CMOVP16rr : I<0x4A, MRMSrcReg, // if parity, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovp{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_P, EFLAGS))]>,
- TB, OpSize;
-def CMOVP32rr : I<0x4A, MRMSrcReg, // if parity, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovp{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_P, EFLAGS))]>,
- TB;
-def CMOVNP16rr : I<0x4B, MRMSrcReg, // if !parity, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovnp{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_NP, EFLAGS))]>,
- TB, OpSize;
-def CMOVNP32rr : I<0x4B, MRMSrcReg, // if !parity, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovnp{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_NP, EFLAGS))]>,
- TB;
-def CMOVO16rr : I<0x40, MRMSrcReg, // if overflow, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovo{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_O, EFLAGS))]>,
- TB, OpSize;
-def CMOVO32rr : I<0x40, MRMSrcReg, // if overflow, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovo{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_O, EFLAGS))]>,
- TB;
-def CMOVNO16rr : I<0x41, MRMSrcReg, // if !overflow, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovno{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_NO, EFLAGS))]>,
- TB, OpSize;
-def CMOVNO32rr : I<0x41, MRMSrcReg, // if !overflow, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovno{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_NO, EFLAGS))]>,
- TB;
-} // isCommutable = 1
-
-def CMOVB16rm : I<0x42, MRMSrcMem, // if <u, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_B, EFLAGS))]>,
- TB, OpSize;
-def CMOVB32rm : I<0x42, MRMSrcMem, // if <u, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_B, EFLAGS))]>,
- TB;
-def CMOVAE16rm: I<0x43, MRMSrcMem, // if >=u, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovae{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_AE, EFLAGS))]>,
- TB, OpSize;
-def CMOVAE32rm: I<0x43, MRMSrcMem, // if >=u, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovae{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_AE, EFLAGS))]>,
- TB;
-def CMOVE16rm : I<0x44, MRMSrcMem, // if ==, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmove{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_E, EFLAGS))]>,
- TB, OpSize;
-def CMOVE32rm : I<0x44, MRMSrcMem, // if ==, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmove{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_E, EFLAGS))]>,
- TB;
-def CMOVNE16rm: I<0x45, MRMSrcMem, // if !=, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovne{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_NE, EFLAGS))]>,
- TB, OpSize;
-def CMOVNE32rm: I<0x45, MRMSrcMem, // if !=, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovne{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NE, EFLAGS))]>,
- TB;
-def CMOVBE16rm: I<0x46, MRMSrcMem, // if <=u, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovbe{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_BE, EFLAGS))]>,
- TB, OpSize;
-def CMOVBE32rm: I<0x46, MRMSrcMem, // if <=u, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovbe{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_BE, EFLAGS))]>,
- TB;
-def CMOVA16rm : I<0x47, MRMSrcMem, // if >u, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmova{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_A, EFLAGS))]>,
- TB, OpSize;
-def CMOVA32rm : I<0x47, MRMSrcMem, // if >u, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmova{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_A, EFLAGS))]>,
- TB;
-def CMOVL16rm : I<0x4C, MRMSrcMem, // if <s, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovl{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_L, EFLAGS))]>,
- TB, OpSize;
-def CMOVL32rm : I<0x4C, MRMSrcMem, // if <s, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovl{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_L, EFLAGS))]>,
- TB;
-def CMOVGE16rm: I<0x4D, MRMSrcMem, // if >=s, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovge{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_GE, EFLAGS))]>,
- TB, OpSize;
-def CMOVGE32rm: I<0x4D, MRMSrcMem, // if >=s, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovge{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_GE, EFLAGS))]>,
- TB;
-def CMOVLE16rm: I<0x4E, MRMSrcMem, // if <=s, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovle{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_LE, EFLAGS))]>,
- TB, OpSize;
-def CMOVLE32rm: I<0x4E, MRMSrcMem, // if <=s, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovle{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_LE, EFLAGS))]>,
- TB;
-def CMOVG16rm : I<0x4F, MRMSrcMem, // if >s, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovg{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_G, EFLAGS))]>,
- TB, OpSize;
-def CMOVG32rm : I<0x4F, MRMSrcMem, // if >s, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovg{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_G, EFLAGS))]>,
- TB;
-def CMOVS16rm : I<0x48, MRMSrcMem, // if signed, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovs{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_S, EFLAGS))]>,
- TB, OpSize;
-def CMOVS32rm : I<0x48, MRMSrcMem, // if signed, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovs{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_S, EFLAGS))]>,
- TB;
-def CMOVNS16rm: I<0x49, MRMSrcMem, // if !signed, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovns{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_NS, EFLAGS))]>,
- TB, OpSize;
-def CMOVNS32rm: I<0x49, MRMSrcMem, // if !signed, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovns{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NS, EFLAGS))]>,
- TB;
-def CMOVP16rm : I<0x4A, MRMSrcMem, // if parity, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovp{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_P, EFLAGS))]>,
- TB, OpSize;
-def CMOVP32rm : I<0x4A, MRMSrcMem, // if parity, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovp{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_P, EFLAGS))]>,
- TB;
-def CMOVNP16rm : I<0x4B, MRMSrcMem, // if !parity, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovnp{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_NP, EFLAGS))]>,
- TB, OpSize;
-def CMOVNP32rm : I<0x4B, MRMSrcMem, // if !parity, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovnp{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NP, EFLAGS))]>,
- TB;
-def CMOVO16rm : I<0x40, MRMSrcMem, // if overflow, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovo{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_O, EFLAGS))]>,
- TB, OpSize;
-def CMOVO32rm : I<0x40, MRMSrcMem, // if overflow, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovo{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_O, EFLAGS))]>,
- TB;
-def CMOVNO16rm : I<0x41, MRMSrcMem, // if !overflow, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovno{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_NO, EFLAGS))]>,
- TB, OpSize;
-def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovno{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NO, EFLAGS))]>,
- TB;
-} // Predicates = [HasCMov]
-
-// X86 doesn't have 8-bit conditional moves. Use a customInserter to
-// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
-// however that requires promoting the operands, and can induce additional
-// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
-// clobber EFLAGS, because if one of the operands is zero, the expansion
-// could involve an xor.
-let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
-def CMOV_GR8 : I<0, Pseudo,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
- "#CMOV_GR8 PSEUDO!",
- [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
- imm:$cond, EFLAGS))]>;
-
-let Predicates = [NoCMov] in {
-def CMOV_GR32 : I<0, Pseudo,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
- "#CMOV_GR32* PSEUDO!",
- [(set GR32:$dst,
- (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
-def CMOV_GR16 : I<0, Pseudo,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
- "#CMOV_GR16* PSEUDO!",
- [(set GR16:$dst,
- (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
-def CMOV_RFP32 : I<0, Pseudo,
- (outs RFP32:$dst),
- (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
- "#CMOV_RFP32 PSEUDO!",
- [(set RFP32:$dst,
- (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
- EFLAGS))]>;
-def CMOV_RFP64 : I<0, Pseudo,
- (outs RFP64:$dst),
- (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
- "#CMOV_RFP64 PSEUDO!",
- [(set RFP64:$dst,
- (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
- EFLAGS))]>;
-def CMOV_RFP80 : I<0, Pseudo,
- (outs RFP80:$dst),
- (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
- "#CMOV_RFP80 PSEUDO!",
- [(set RFP80:$dst,
- (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
- EFLAGS))]>;
-} // Predicates = [NoCMov]
-} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
-} // Uses = [EFLAGS]
-
-
-// unary instructions
-let CodeSize = 2 in {
-let Defs = [EFLAGS] in {
-def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
- "neg{b}\t$dst",
- [(set GR8:$dst, (ineg GR8:$src1)),
- (implicit EFLAGS)]>;
-def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "neg{w}\t$dst",
- [(set GR16:$dst, (ineg GR16:$src1)),
- (implicit EFLAGS)]>, OpSize;
-def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "neg{l}\t$dst",
- [(set GR32:$dst, (ineg GR32:$src1)),
- (implicit EFLAGS)]>;
-
-let Constraints = "" in {
- def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
- "neg{b}\t$dst",
- [(store (ineg (loadi8 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
- def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
- "neg{w}\t$dst",
- [(store (ineg (loadi16 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
- "neg{l}\t$dst",
- [(store (ineg (loadi32 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-// Match xor -1 to not. Favors these over a move imm + xor to save code size.
-let AddedComplexity = 15 in {
-def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
- "not{b}\t$dst",
- [(set GR8:$dst, (not GR8:$src1))]>;
-def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "not{w}\t$dst",
- [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
-def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "not{l}\t$dst",
- [(set GR32:$dst, (not GR32:$src1))]>;
-}
-let Constraints = "" in {
- def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
- "not{b}\t$dst",
- [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
- def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
- "not{w}\t$dst",
- [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
- def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
- "not{l}\t$dst",
- [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
-} // Constraints = ""
-} // CodeSize
-
-// TODO: inc/dec is slow for P4, but fast for Pentium-M.
-let Defs = [EFLAGS] in {
-let CodeSize = 2 in
-def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
- "inc{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
-
-let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
- "inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
- OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
- "inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
- Requires<[In32BitMode]>;
-}
-let Constraints = "", CodeSize = 2 in {
- def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
- [(store (add (loadi8 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>;
- def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
- [(store (add (loadi16 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In32BitMode]>;
- def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
- [(store (add (loadi32 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In32BitMode]>;
-} // Constraints = "", CodeSize = 2
-
-let CodeSize = 2 in
-def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
- "dec{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
-let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
- "dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
- OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
- "dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
- Requires<[In32BitMode]>;
-} // CodeSize = 2
-
-let Constraints = "", CodeSize = 2 in {
- def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
- [(store (add (loadi8 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>;
- def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
- [(store (add (loadi16 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In32BitMode]>;
- def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
- [(store (add (loadi32 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In32BitMode]>;
-} // Constraints = "", CodeSize = 2
-} // Defs = [EFLAGS]
-
-// Logical operators...
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in { // X = AND Y, Z --> X = AND Z, Y
-def AND8rr : I<0x20, MRMDestReg,
- (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, GR8:$src2))]>;
-def AND16rr : I<0x21, MRMDestReg,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
- GR16:$src2))]>, OpSize;
-def AND32rr : I<0x21, MRMDestReg,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
- GR32:$src2))]>;
-}
-
-// AND instructions with the destination register in REG and the source register
-// in R/M. Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}", []>;
-def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def AND8rm : I<0x22, MRMSrcMem,
- (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
- (loadi8 addr:$src2)))]>;
-def AND16rm : I<0x23, MRMSrcMem,
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
- (loadi16 addr:$src2)))]>,
- OpSize;
-def AND32rm : I<0x23, MRMSrcMem,
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
- (loadi32 addr:$src2)))]>;
-
-def AND8ri : Ii8<0x80, MRM4r,
- (outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
- imm:$src2))]>;
-def AND16ri : Ii16<0x81, MRM4r,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
- imm:$src2))]>, OpSize;
-def AND32ri : Ii32<0x81, MRM4r,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
- imm:$src2))]>;
-def AND16ri8 : Ii8<0x83, MRM4r,
- (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
- i16immSExt8:$src2))]>,
- OpSize;
-def AND32ri8 : Ii8<0x83, MRM4r,
- (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
- i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
- def AND8mr : I<0x20, MRMDestMem,
- (outs), (ins i8mem :$dst, GR8 :$src),
- "and{b}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), GR8:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def AND16mr : I<0x21, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src),
- "and{w}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), GR16:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def AND32mr : I<0x21, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src),
- "and{l}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), GR32:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def AND8mi : Ii8<0x80, MRM4m,
- (outs), (ins i8mem :$dst, i8imm :$src),
- "and{b}\t{$src, $dst|$dst, $src}",
- [(store (and (loadi8 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def AND16mi : Ii16<0x81, MRM4m,
- (outs), (ins i16mem:$dst, i16imm:$src),
- "and{w}\t{$src, $dst|$dst, $src}",
- [(store (and (loadi16 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def AND32mi : Ii32<0x81, MRM4m,
- (outs), (ins i32mem:$dst, i32imm:$src),
- "and{l}\t{$src, $dst|$dst, $src}",
- [(store (and (loadi32 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def AND16mi8 : Ii8<0x83, MRM4m,
- (outs), (ins i16mem:$dst, i16i8imm :$src),
- "and{w}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), i16immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def AND32mi8 : Ii8<0x83, MRM4m,
- (outs), (ins i32mem:$dst, i32i8imm :$src),
- "and{l}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
- def AND8i8 : Ii8<0x24, RawFrm, (outs), (ins i8imm:$src),
- "and{b}\t{$src, %al|%al, $src}", []>;
- def AND16i16 : Ii16<0x25, RawFrm, (outs), (ins i16imm:$src),
- "and{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src),
- "and{l}\t{$src, %eax|%eax, $src}", []>;
-
-} // Constraints = ""
-
-
-let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y
-def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst),
- (ins GR8 :$src1, GR8 :$src2),
- "or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, GR8:$src2))]>;
-def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,GR16:$src2))]>,
- OpSize;
-def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,GR32:$src2))]>;
-}
-
-// OR instructions with the destination register in REG and the source register
-// in R/M. Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "or{b}\t{$src2, $dst|$dst, $src2}", []>;
-def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst),
- (ins GR8 :$src1, i8mem :$src2),
- "or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1,
- (load addr:$src2)))]>;
-def OR16rm : I<0x0B, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
- (load addr:$src2)))]>,
- OpSize;
-def OR32rm : I<0x0B, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
- (load addr:$src2)))]>;
-
-def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst),
- (ins GR8 :$src1, i8imm:$src2),
- "or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst,EFLAGS, (X86or_flag GR8:$src1, imm:$src2))]>;
-def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
- imm:$src2))]>, OpSize;
-def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
- imm:$src2))]>;
-
-def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
- i16immSExt8:$src2))]>, OpSize;
-def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
- i32immSExt8:$src2))]>;
-let Constraints = "" in {
- def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
- "or{b}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), GR8:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def OR16mr : I<0x09, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
- "or{w}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), GR16:$src), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def OR32mr : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "or{l}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), GR32:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def OR8mi : Ii8<0x80, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
- "or{b}\t{$src, $dst|$dst, $src}",
- [(store (or (loadi8 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def OR16mi : Ii16<0x81, MRM1m, (outs), (ins i16mem:$dst, i16imm:$src),
- "or{w}\t{$src, $dst|$dst, $src}",
- [(store (or (loadi16 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def OR32mi : Ii32<0x81, MRM1m, (outs), (ins i32mem:$dst, i32imm:$src),
- "or{l}\t{$src, $dst|$dst, $src}",
- [(store (or (loadi32 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def OR16mi8 : Ii8<0x83, MRM1m, (outs), (ins i16mem:$dst, i16i8imm:$src),
- "or{w}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), i16immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def OR32mi8 : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src),
- "or{l}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
- def OR8i8 : Ii8 <0x0C, RawFrm, (outs), (ins i8imm:$src),
- "or{b}\t{$src, %al|%al, $src}", []>;
- def OR16i16 : Ii16 <0x0D, RawFrm, (outs), (ins i16imm:$src),
- "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src),
- "or{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-
-let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
- def XOR8rr : I<0x30, MRMDestReg,
- (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
- GR8:$src2))]>;
- def XOR16rr : I<0x31, MRMDestReg,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
- GR16:$src2))]>, OpSize;
- def XOR32rr : I<0x31, MRMDestReg,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
- GR32:$src2))]>;
-} // isCommutable = 1
-
-// XOR instructions with the destination register in REG and the source register
-// in R/M. Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}", []>;
-def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def XOR8rm : I<0x32, MRMSrcMem,
- (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
- (load addr:$src2)))]>;
-def XOR16rm : I<0x33, MRMSrcMem,
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
- (load addr:$src2)))]>,
- OpSize;
-def XOR32rm : I<0x33, MRMSrcMem,
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
- (load addr:$src2)))]>;
-
-def XOR8ri : Ii8<0x80, MRM6r,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, imm:$src2))]>;
-def XOR16ri : Ii16<0x81, MRM6r,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
- imm:$src2))]>, OpSize;
-def XOR32ri : Ii32<0x81, MRM6r,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
- imm:$src2))]>;
-def XOR16ri8 : Ii8<0x83, MRM6r,
- (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
- i16immSExt8:$src2))]>,
- OpSize;
-def XOR32ri8 : Ii8<0x83, MRM6r,
- (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
- i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
- def XOR8mr : I<0x30, MRMDestMem,
- (outs), (ins i8mem :$dst, GR8 :$src),
- "xor{b}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def XOR16mr : I<0x31, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src),
- "xor{w}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def XOR32mr : I<0x31, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src),
- "xor{l}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), GR32:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def XOR8mi : Ii8<0x80, MRM6m,
- (outs), (ins i8mem :$dst, i8imm :$src),
- "xor{b}\t{$src, $dst|$dst, $src}",
- [(store (xor (loadi8 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def XOR16mi : Ii16<0x81, MRM6m,
- (outs), (ins i16mem:$dst, i16imm:$src),
- "xor{w}\t{$src, $dst|$dst, $src}",
- [(store (xor (loadi16 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def XOR32mi : Ii32<0x81, MRM6m,
- (outs), (ins i32mem:$dst, i32imm:$src),
- "xor{l}\t{$src, $dst|$dst, $src}",
- [(store (xor (loadi32 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def XOR16mi8 : Ii8<0x83, MRM6m,
- (outs), (ins i16mem:$dst, i16i8imm :$src),
- "xor{w}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), i16immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def XOR32mi8 : Ii8<0x83, MRM6m,
- (outs), (ins i32mem:$dst, i32i8imm :$src),
- "xor{l}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
- def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src),
- "xor{b}\t{$src, %al|%al, $src}", []>;
- def XOR16i16 : Ii16<0x35, RawFrm, (outs), (ins i16imm:$src),
- "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src),
- "xor{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-// Shift instructions
-let Defs = [EFLAGS] in {
-let Uses = [CL] in {
-def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
- "shl{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (shl GR8:$src1, CL))]>;
-def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
- "shl{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
-def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
- "shl{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (shl GR32:$src1, CL))]>;
-} // Uses = [CL]
-
-def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
- "shl{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
-
-let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
-def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "shl{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
-def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "shl{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
-
-// NOTE: We don't include patterns for shifts of a register by one, because
-// 'add reg,reg' is cheaper.
-
-def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
- "shl{b}\t$dst", []>;
-def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
- "shl{w}\t$dst", []>, OpSize;
-def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
- "shl{l}\t$dst", []>;
-
-} // isConvertibleToThreeAddress = 1
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
- "shl{b}\t{%cl, $dst|$dst, CL}",
- [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
- def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
- "shl{w}\t{%cl, $dst|$dst, CL}",
- [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
- def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
- "shl{l}\t{%cl, $dst|$dst, CL}",
- [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
- "shl{b}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
- "shl{w}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
- "shl{l}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Shift by 1
- def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
- "shl{b}\t$dst",
- [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
- "shl{w}\t$dst",
- [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize;
- def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
- "shl{l}\t$dst",
- [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
- "shr{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (srl GR8:$src1, CL))]>;
-def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
- "shr{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
-def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
- "shr{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (srl GR32:$src1, CL))]>;
-}
-
-def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "shr{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
-def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "shr{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
-def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "shr{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
-
-// Shift by 1
-def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
- "shr{b}\t$dst",
- [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
-def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
- "shr{w}\t$dst",
- [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
-def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
- "shr{l}\t$dst",
- [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
- "shr{b}\t{%cl, $dst|$dst, CL}",
- [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
- def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
- "shr{w}\t{%cl, $dst|$dst, CL}",
- [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
- OpSize;
- def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
- "shr{l}\t{%cl, $dst|$dst, CL}",
- [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
- "shr{b}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
- "shr{w}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
- "shr{l}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Shift by 1
- def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
- "shr{b}\t$dst",
- [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
- "shr{w}\t$dst",
- [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
- def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
- "shr{l}\t$dst",
- [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
- "sar{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (sra GR8:$src1, CL))]>;
-def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
- "sar{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
-def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
- "sar{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (sra GR32:$src1, CL))]>;
-}
-
-def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
- "sar{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
-def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "sar{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
- OpSize;
-def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "sar{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
-
-// Shift by 1
-def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
- "sar{b}\t$dst",
- [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
-def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
- "sar{w}\t$dst",
- [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
-def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
- "sar{l}\t$dst",
- [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
- "sar{b}\t{%cl, $dst|$dst, CL}",
- [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
- def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
- "sar{w}\t{%cl, $dst|$dst, CL}",
- [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
- def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
- "sar{l}\t{%cl, $dst|$dst, CL}",
- [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
- "sar{b}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
- "sar{w}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
- "sar{l}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Shift by 1
- def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
- "sar{b}\t$dst",
- [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
- "sar{w}\t$dst",
- [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize;
- def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
- "sar{l}\t$dst",
- [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-// Rotate instructions
-
-def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
- "rcl{b}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
- "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
- "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-let Uses = [CL] in {
-def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-}
-def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
- "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-
-def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "rcl{l}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
- "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
- "rcr{b}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
- "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
- "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-let Uses = [CL] in {
-def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-}
-def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
- "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-
-def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "rcr{l}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
- "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Constraints = "" in {
-def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
- "rcl{b}\t{1, $dst|$dst, 1}", []>;
-def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
- "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
- "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
- "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
- "rcl{l}\t{1, $dst|$dst, 1}", []>;
-def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
- "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
- "rcr{b}\t{1, $dst|$dst, 1}", []>;
-def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
- "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
- "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
- "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
- "rcr{l}\t{1, $dst|$dst, 1}", []>;
-def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
- "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
- "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
- "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
- "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
- "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
- "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
- "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-} // Constraints = ""
-
-// FIXME: provide shorter instructions when imm8 == 1
-let Uses = [CL] in {
-def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
- "rol{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
-def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
- "rol{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
-def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
- "rol{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
-}
-
-def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
- "rol{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
-def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "rol{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>,
- OpSize;
-def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "rol{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
-
-// Rotate by 1
-def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
- "rol{b}\t$dst",
- [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
-def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
- "rol{w}\t$dst",
- [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
-def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
- "rol{l}\t$dst",
- [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
- "rol{b}\t{%cl, $dst|$dst, CL}",
- [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
- def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
- "rol{w}\t{%cl, $dst|$dst, CL}",
- [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
- def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
- "rol{l}\t{%cl, $dst|$dst, CL}",
- [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src),
- "rol{b}\t{$src, $dst|$dst, $src}",
- [(store (rotl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src),
- "rol{w}\t{$src, $dst|$dst, $src}",
- [(store (rotl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src),
- "rol{l}\t{$src, $dst|$dst, $src}",
- [(store (rotl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Rotate by 1
- def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
- "rol{b}\t$dst",
- [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
- "rol{w}\t$dst",
- [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize;
- def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
- "rol{l}\t$dst",
- [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
- "ror{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
-def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
- "ror{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
-def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
- "ror{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
-}
-
-def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
- "ror{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
-def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "ror{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>,
- OpSize;
-def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "ror{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
-
-// Rotate by 1
-def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
- "ror{b}\t$dst",
- [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
-def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
- "ror{w}\t$dst",
- [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
-def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
- "ror{l}\t$dst",
- [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
- "ror{b}\t{%cl, $dst|$dst, CL}",
- [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
- def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
- "ror{w}\t{%cl, $dst|$dst, CL}",
- [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
- def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
- "ror{l}\t{%cl, $dst|$dst, CL}",
- [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
- "ror{b}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
- "ror{w}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
- "ror{l}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Rotate by 1
- def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
- "ror{b}\t$dst",
- [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
- "ror{w}\t$dst",
- [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize;
- def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
- "ror{l}\t$dst",
- [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-
-// Double shift instructions (generalizations of rotate)
-let Uses = [CL] in {
-def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
-def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
-def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
- TB, OpSize;
-def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
- TB, OpSize;
-}
-
-let isCommutable = 1 in { // These instructions commute to each other.
-def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
- (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2, i8imm:$src3),
- "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
- (i8 imm:$src3)))]>,
- TB;
-def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
- (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2, i8imm:$src3),
- "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
- (i8 imm:$src3)))]>,
- TB;
-def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
- (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2, i8imm:$src3),
- "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
- (i8 imm:$src3)))]>,
- TB, OpSize;
-def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
- (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2, i8imm:$src3),
- "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
- (i8 imm:$src3)))]>,
- TB, OpSize;
-}
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
- addr:$dst)]>, TB;
- def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
- addr:$dst)]>, TB;
- }
- def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
- "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB;
- def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
- "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB;
-
- let Uses = [CL] in {
- def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize;
- def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize;
- }
- def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
- "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB, OpSize;
- def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
- "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB, OpSize;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-
-// Arithmetic.
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y
-// Register-Register Addition
-def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst),
- (ins GR8 :$src1, GR8 :$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, GR8:$src2))]>;
-
-let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
-// Register-Register Addition
-def ADD16rr : I<0x01, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
- GR16:$src2))]>, OpSize;
-def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
- GR32:$src2))]>;
-} // end isConvertibleToThreeAddress
-} // end isCommutable
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
- def ADD8rr_alt: I<0x02, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}", []>;
- def ADD16rr_alt: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
- def ADD32rr_alt: I<0x03, MRMSrcReg,(outs GR32:$dst),(ins GR32:$src1, GR32:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Addition
-def ADD8rm : I<0x02, MRMSrcMem, (outs GR8 :$dst),
- (ins GR8 :$src1, i8mem :$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1,
- (load addr:$src2)))]>;
-def ADD16rm : I<0x03, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
- (load addr:$src2)))]>, OpSize;
-def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
- (load addr:$src2)))]>;
-
-// Register-Integer Addition
-def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS,
- (X86add_flag GR8:$src1, imm:$src2))]>;
-
-let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
-// Register-Integer Addition
-def ADD16ri : Ii16<0x81, MRM0r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86add_flag GR16:$src1, imm:$src2))]>, OpSize;
-def ADD32ri : Ii32<0x81, MRM0r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86add_flag GR32:$src1, imm:$src2))]>;
-def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86add_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
-def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86add_flag GR32:$src1, i32immSExt8:$src2))]>;
-}
-
-let Constraints = "" in {
- // Memory-Register Addition
- def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR8:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR16:$src2), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR32:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), i16immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), i32immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>;
-
- // addition to rAX
- def ADD8i8 : Ii8<0x04, RawFrm, (outs), (ins i8imm:$src),
- "add{b}\t{$src, %al|%al, $src}", []>;
- def ADD16i16 : Ii16<0x05, RawFrm, (outs), (ins i16imm:$src),
- "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src),
- "add{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let Uses = [EFLAGS] in {
-let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y
-def ADC8rr : I<0x10, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (adde GR8:$src1, GR8:$src2))]>;
-def ADC16rr : I<0x11, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (adde GR16:$src1, GR16:$src2))]>, OpSize;
-def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
-}
-
-let isCodeGenOnly = 1 in {
-def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}", []>;
-def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst),
- (ins GR8:$src1, i8mem:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2)))]>;
-def ADC16rm : I<0x13, MRMSrcMem , (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2)))]>,
- OpSize;
-def ADC32rm : I<0x13, MRMSrcMem , (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, (load addr:$src2)))]>;
-def ADC8ri : Ii8<0x80, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (adde GR8:$src1, imm:$src2))]>;
-def ADC16ri : Ii16<0x81, MRM2r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (adde GR16:$src1, imm:$src2))]>, OpSize;
-def ADC16ri8 : Ii8<0x83, MRM2r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (adde GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def ADC32ri : Ii32<0x81, MRM2r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, imm:$src2))]>;
-def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
- def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>;
- def ADC16mr : I<0x11, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), GR16:$src2), addr:$dst)]>,
- OpSize;
- def ADC32mr : I<0x11, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), GR32:$src2), addr:$dst)]>;
- def ADC8mi : Ii8<0x80, MRM2m, (outs), (ins i8mem:$dst, i8imm:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
- def ADC16mi : Ii16<0x81, MRM2m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
- OpSize;
- def ADC16mi8 : Ii8<0x83, MRM2m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
- OpSize;
- def ADC32mi : Ii32<0x81, MRM2m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
- def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
-
- def ADC8i8 : Ii8<0x14, RawFrm, (outs), (ins i8imm:$src),
- "adc{b}\t{$src, %al|%al, $src}", []>;
- def ADC16i16 : Ii16<0x15, RawFrm, (outs), (ins i16imm:$src),
- "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src),
- "adc{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-} // Uses = [EFLAGS]
-
-// Register-Register Subtraction
-def SUB8rr : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS,
- (X86sub_flag GR8:$src1, GR8:$src2))]>;
-def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86sub_flag GR16:$src1, GR16:$src2))]>, OpSize;
-def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86sub_flag GR32:$src1, GR32:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>;
-def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Subtraction
-def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
- (ins GR8 :$src1, i8mem :$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS,
- (X86sub_flag GR8:$src1, (load addr:$src2)))]>;
-def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86sub_flag GR16:$src1, (load addr:$src2)))]>, OpSize;
-def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86sub_flag GR32:$src1, (load addr:$src2)))]>;
-
-// Register-Integer Subtraction
-def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst),
- (ins GR8:$src1, i8imm:$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS,
- (X86sub_flag GR8:$src1, imm:$src2))]>;
-def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86sub_flag GR16:$src1, imm:$src2))]>, OpSize;
-def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86sub_flag GR32:$src1, imm:$src2))]>;
-def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86sub_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
-def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
- // Memory-Register Subtraction
- def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR8:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR16:$src2), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR32:$src2), addr:$dst),
- (implicit EFLAGS)]>;
-
- // Memory-Integer Subtraction
- def SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (loadi16 addr:$dst), imm:$src2),addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (loadi32 addr:$dst), imm:$src2),addr:$dst),
- (implicit EFLAGS)]>;
- def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), i16immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), i32immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>;
-
- def SUB8i8 : Ii8<0x2C, RawFrm, (outs), (ins i8imm:$src),
- "sub{b}\t{$src, %al|%al, $src}", []>;
- def SUB16i16 : Ii16<0x2D, RawFrm, (outs), (ins i16imm:$src),
- "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src),
- "sub{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let Uses = [EFLAGS] in {
-def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst),
- (ins GR8:$src1, GR8:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sube GR8:$src1, GR8:$src2))]>;
-def SBB16rr : I<0x19, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sube GR16:$src1, GR16:$src2))]>, OpSize;
-def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
-
-let Constraints = "" in {
- def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>;
- def SBB16mr : I<0x19, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), GR16:$src2), addr:$dst)]>,
- OpSize;
- def SBB32mr : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>;
- def SBB8mi : Ii8<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
- def SBB16mi : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
- OpSize;
- def SBB16mi8 : Ii8<0x83, MRM3m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
- OpSize;
- def SBB32mi : Ii32<0x81, MRM3m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
- def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
-
- def SBB8i8 : Ii8<0x1C, RawFrm, (outs), (ins i8imm:$src),
- "sbb{b}\t{$src, %al|%al, $src}", []>;
- def SBB16i16 : Ii16<0x1D, RawFrm, (outs), (ins i16imm:$src),
- "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src),
- "sbb{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let isCodeGenOnly = 1 in {
-def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}", []>;
-def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2)))]>;
-def SBB16rm : I<0x1B, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2)))]>,
- OpSize;
-def SBB32rm : I<0x1B, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, (load addr:$src2)))]>;
-def SBB8ri : Ii8<0x80, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sube GR8:$src1, imm:$src2))]>;
-def SBB16ri : Ii16<0x81, MRM3r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sube GR16:$src1, imm:$src2))]>, OpSize;
-def SBB16ri8 : Ii8<0x83, MRM3r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sube GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def SBB32ri : Ii32<0x81, MRM3r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, imm:$src2))]>;
-def SBB32ri8 : Ii8<0x83, MRM3r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, i32immSExt8:$src2))]>;
-} // Uses = [EFLAGS]
-} // Defs = [EFLAGS]
-
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
-// Register-Register Signed Integer Multiply
-def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
- "imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
-def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
- "imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
-}
-
-// Register-Memory Signed Integer Multiply
-def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
- TB, OpSize;
-def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
-} // Defs = [EFLAGS]
-} // end Two Address instructions
-
-// Suprisingly enough, these are not two address instructions!
-let Defs = [EFLAGS] in {
-// Register-Integer Signed Integer Multiply
-def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
-def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, imm:$src2))]>;
-def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
- (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
- (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
-
-// Memory-Integer Signed Integer Multiply
-def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
- (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1), imm:$src2))]>,
- OpSize;
-def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
- (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1), imm:$src2))]>;
-def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
- (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1),
- i16immSExt8:$src2))]>, OpSize;
-def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
- (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1),
- i32immSExt8:$src2))]>;
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-// Test instructions are just like AND, except they don't generate a result.
-//
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in { // TEST X, Y --> TEST Y, X
-def TEST8rr : I<0x84, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2),
- "test{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR8:$src1, GR8:$src2), 0))]>;
-def TEST16rr : I<0x85, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
- "test{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR16:$src1, GR16:$src2),
- 0))]>,
- OpSize;
-def TEST32rr : I<0x85, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
- "test{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR32:$src1, GR32:$src2),
- 0))]>;
-}
-
-def TEST8i8 : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src),
- "test{b}\t{$src, %al|%al, $src}", []>;
-def TEST16i16 : Ii16<0xA9, RawFrm, (outs), (ins i16imm:$src),
- "test{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src),
- "test{l}\t{$src, %eax|%eax, $src}", []>;
-
-def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2),
- "test{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR8:$src1, (loadi8 addr:$src2)),
- 0))]>;
-def TEST16rm : I<0x85, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2),
- "test{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR16:$src1,
- (loadi16 addr:$src2)), 0))]>, OpSize;
-def TEST32rm : I<0x85, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2),
- "test{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR32:$src1,
- (loadi32 addr:$src2)), 0))]>;
-
-def TEST8ri : Ii8 <0xF6, MRM0r, // flags = GR8 & imm8
- (outs), (ins GR8:$src1, i8imm:$src2),
- "test{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR8:$src1, imm:$src2), 0))]>;
-def TEST16ri : Ii16<0xF7, MRM0r, // flags = GR16 & imm16
- (outs), (ins GR16:$src1, i16imm:$src2),
- "test{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR16:$src1, imm:$src2), 0))]>,
- OpSize;
-def TEST32ri : Ii32<0xF7, MRM0r, // flags = GR32 & imm32
- (outs), (ins GR32:$src1, i32imm:$src2),
- "test{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR32:$src1, imm:$src2), 0))]>;
-
-def TEST8mi : Ii8 <0xF6, MRM0m, // flags = [mem8] & imm8
- (outs), (ins i8mem:$src1, i8imm:$src2),
- "test{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and (loadi8 addr:$src1), imm:$src2),
- 0))]>;
-def TEST16mi : Ii16<0xF7, MRM0m, // flags = [mem16] & imm16
- (outs), (ins i16mem:$src1, i16imm:$src2),
- "test{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and (loadi16 addr:$src1), imm:$src2),
- 0))]>, OpSize;
-def TEST32mi : Ii32<0xF7, MRM0m, // flags = [mem32] & imm32
- (outs), (ins i32mem:$src1, i32imm:$src2),
- "test{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and (loadi32 addr:$src1), imm:$src2),
- 0))]>;
-} // Defs = [EFLAGS]
-
// Condition code ops, incl. set if equal/not equal/...
let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
@@ -3374,305 +925,10 @@ def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH
let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
-let Uses = [EFLAGS] in {
-// Use sbb to materialize carry bit.
-let Defs = [EFLAGS], isCodeGenOnly = 1 in {
-// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
-// However, Pat<> can't replicate the destination reg into the inputs of the
-// result.
-// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
-// X86CodeEmitter.
-def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
- [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
- [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
- OpSize;
-def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-} // isCodeGenOnly
-
-def SETEr : I<0x94, MRM0r,
- (outs GR8 :$dst), (ins),
- "sete\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_E, EFLAGS))]>,
- TB; // GR8 = ==
-def SETEm : I<0x94, MRM0m,
- (outs), (ins i8mem:$dst),
- "sete\t$dst",
- [(store (X86setcc X86_COND_E, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = ==
-
-def SETNEr : I<0x95, MRM0r,
- (outs GR8 :$dst), (ins),
- "setne\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_NE, EFLAGS))]>,
- TB; // GR8 = !=
-def SETNEm : I<0x95, MRM0m,
- (outs), (ins i8mem:$dst),
- "setne\t$dst",
- [(store (X86setcc X86_COND_NE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = !=
-
-def SETLr : I<0x9C, MRM0r,
- (outs GR8 :$dst), (ins),
- "setl\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_L, EFLAGS))]>,
- TB; // GR8 = < signed
-def SETLm : I<0x9C, MRM0m,
- (outs), (ins i8mem:$dst),
- "setl\t$dst",
- [(store (X86setcc X86_COND_L, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = < signed
-
-def SETGEr : I<0x9D, MRM0r,
- (outs GR8 :$dst), (ins),
- "setge\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_GE, EFLAGS))]>,
- TB; // GR8 = >= signed
-def SETGEm : I<0x9D, MRM0m,
- (outs), (ins i8mem:$dst),
- "setge\t$dst",
- [(store (X86setcc X86_COND_GE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = >= signed
-
-def SETLEr : I<0x9E, MRM0r,
- (outs GR8 :$dst), (ins),
- "setle\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_LE, EFLAGS))]>,
- TB; // GR8 = <= signed
-def SETLEm : I<0x9E, MRM0m,
- (outs), (ins i8mem:$dst),
- "setle\t$dst",
- [(store (X86setcc X86_COND_LE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = <= signed
-
-def SETGr : I<0x9F, MRM0r,
- (outs GR8 :$dst), (ins),
- "setg\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_G, EFLAGS))]>,
- TB; // GR8 = > signed
-def SETGm : I<0x9F, MRM0m,
- (outs), (ins i8mem:$dst),
- "setg\t$dst",
- [(store (X86setcc X86_COND_G, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = > signed
-
-def SETBr : I<0x92, MRM0r,
- (outs GR8 :$dst), (ins),
- "setb\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_B, EFLAGS))]>,
- TB; // GR8 = < unsign
-def SETBm : I<0x92, MRM0m,
- (outs), (ins i8mem:$dst),
- "setb\t$dst",
- [(store (X86setcc X86_COND_B, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = < unsign
-
-def SETAEr : I<0x93, MRM0r,
- (outs GR8 :$dst), (ins),
- "setae\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_AE, EFLAGS))]>,
- TB; // GR8 = >= unsign
-def SETAEm : I<0x93, MRM0m,
- (outs), (ins i8mem:$dst),
- "setae\t$dst",
- [(store (X86setcc X86_COND_AE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = >= unsign
-
-def SETBEr : I<0x96, MRM0r,
- (outs GR8 :$dst), (ins),
- "setbe\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_BE, EFLAGS))]>,
- TB; // GR8 = <= unsign
-def SETBEm : I<0x96, MRM0m,
- (outs), (ins i8mem:$dst),
- "setbe\t$dst",
- [(store (X86setcc X86_COND_BE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = <= unsign
-
-def SETAr : I<0x97, MRM0r,
- (outs GR8 :$dst), (ins),
- "seta\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_A, EFLAGS))]>,
- TB; // GR8 = > signed
-def SETAm : I<0x97, MRM0m,
- (outs), (ins i8mem:$dst),
- "seta\t$dst",
- [(store (X86setcc X86_COND_A, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = > signed
-
-def SETSr : I<0x98, MRM0r,
- (outs GR8 :$dst), (ins),
- "sets\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_S, EFLAGS))]>,
- TB; // GR8 = <sign bit>
-def SETSm : I<0x98, MRM0m,
- (outs), (ins i8mem:$dst),
- "sets\t$dst",
- [(store (X86setcc X86_COND_S, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = <sign bit>
-def SETNSr : I<0x99, MRM0r,
- (outs GR8 :$dst), (ins),
- "setns\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_NS, EFLAGS))]>,
- TB; // GR8 = !<sign bit>
-def SETNSm : I<0x99, MRM0m,
- (outs), (ins i8mem:$dst),
- "setns\t$dst",
- [(store (X86setcc X86_COND_NS, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = !<sign bit>
-
-def SETPr : I<0x9A, MRM0r,
- (outs GR8 :$dst), (ins),
- "setp\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_P, EFLAGS))]>,
- TB; // GR8 = parity
-def SETPm : I<0x9A, MRM0m,
- (outs), (ins i8mem:$dst),
- "setp\t$dst",
- [(store (X86setcc X86_COND_P, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = parity
-def SETNPr : I<0x9B, MRM0r,
- (outs GR8 :$dst), (ins),
- "setnp\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_NP, EFLAGS))]>,
- TB; // GR8 = not parity
-def SETNPm : I<0x9B, MRM0m,
- (outs), (ins i8mem:$dst),
- "setnp\t$dst",
- [(store (X86setcc X86_COND_NP, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = not parity
-
-def SETOr : I<0x90, MRM0r,
- (outs GR8 :$dst), (ins),
- "seto\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_O, EFLAGS))]>,
- TB; // GR8 = overflow
-def SETOm : I<0x90, MRM0m,
- (outs), (ins i8mem:$dst),
- "seto\t$dst",
- [(store (X86setcc X86_COND_O, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = overflow
-def SETNOr : I<0x91, MRM0r,
- (outs GR8 :$dst), (ins),
- "setno\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_NO, EFLAGS))]>,
- TB; // GR8 = not overflow
-def SETNOm : I<0x91, MRM0m,
- (outs), (ins i8mem:$dst),
- "setno\t$dst",
- [(store (X86setcc X86_COND_NO, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = not overflow
-} // Uses = [EFLAGS]
-
-
-// Integer comparisons
-let Defs = [EFLAGS] in {
-def CMP8i8 : Ii8<0x3C, RawFrm, (outs), (ins i8imm:$src),
- "cmp{b}\t{$src, %al|%al, $src}", []>;
-def CMP16i16 : Ii16<0x3D, RawFrm, (outs), (ins i16imm:$src),
- "cmp{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src),
- "cmp{l}\t{$src, %eax|%eax, $src}", []>;
-
-def CMP8rr : I<0x38, MRMDestReg,
- (outs), (ins GR8 :$src1, GR8 :$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR8:$src1, GR8:$src2))]>;
-def CMP16rr : I<0x39, MRMDestReg,
- (outs), (ins GR16:$src1, GR16:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR16:$src1, GR16:$src2))]>, OpSize;
-def CMP32rr : I<0x39, MRMDestReg,
- (outs), (ins GR32:$src1, GR32:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR32:$src1, GR32:$src2))]>;
-def CMP8mr : I<0x38, MRMDestMem,
- (outs), (ins i8mem :$src1, GR8 :$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi8 addr:$src1), GR8:$src2))]>;
-def CMP16mr : I<0x39, MRMDestMem,
- (outs), (ins i16mem:$src1, GR16:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi16 addr:$src1), GR16:$src2))]>,
- OpSize;
-def CMP32mr : I<0x39, MRMDestMem,
- (outs), (ins i32mem:$src1, GR32:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi32 addr:$src1), GR32:$src2))]>;
-def CMP8rm : I<0x3A, MRMSrcMem,
- (outs), (ins GR8 :$src1, i8mem :$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR8:$src1, (loadi8 addr:$src2)))]>;
-def CMP16rm : I<0x3B, MRMSrcMem,
- (outs), (ins GR16:$src1, i16mem:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR16:$src1, (loadi16 addr:$src2)))]>,
- OpSize;
-def CMP32rm : I<0x3B, MRMSrcMem,
- (outs), (ins GR32:$src1, i32mem:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR32:$src1, (loadi32 addr:$src2)))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
- def CMP8rr_alt : I<0x3A, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}", []>;
- def CMP16rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize;
- def CMP32rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}", []>;
-}
-def CMP8ri : Ii8<0x80, MRM7r,
- (outs), (ins GR8:$src1, i8imm:$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR8:$src1, imm:$src2))]>;
-def CMP16ri : Ii16<0x81, MRM7r,
- (outs), (ins GR16:$src1, i16imm:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR16:$src1, imm:$src2))]>, OpSize;
-def CMP32ri : Ii32<0x81, MRM7r,
- (outs), (ins GR32:$src1, i32imm:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR32:$src1, imm:$src2))]>;
-def CMP8mi : Ii8 <0x80, MRM7m,
- (outs), (ins i8mem :$src1, i8imm :$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi8 addr:$src1), imm:$src2))]>;
-def CMP16mi : Ii16<0x81, MRM7m,
- (outs), (ins i16mem:$src1, i16imm:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi16 addr:$src1), imm:$src2))]>,
- OpSize;
-def CMP32mi : Ii32<0x81, MRM7m,
- (outs), (ins i32mem:$src1, i32imm:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi32 addr:$src1), imm:$src2))]>;
-def CMP16ri8 : Ii8<0x83, MRM7r,
- (outs), (ins GR16:$src1, i16i8imm:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def CMP16mi8 : Ii8<0x83, MRM7m,
- (outs), (ins i16mem:$src1, i16i8imm:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi16 addr:$src1),
- i16immSExt8:$src2))]>, OpSize;
-def CMP32mi8 : Ii8<0x83, MRM7m,
- (outs), (ins i32mem:$src1, i32i8imm:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi32 addr:$src1),
- i32immSExt8:$src2))]>;
-def CMP32ri8 : Ii8<0x83, MRM7r,
- (outs), (ins GR32:$src1, i32i8imm:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR32:$src1, i32immSExt8:$src2))]>;
-} // Defs = [EFLAGS]
+//===----------------------------------------------------------------------===//
+// Bit tests instructions: BT, BTS, BTR, BTC.
-// Bit tests.
-// TODO: BTC, BTR, and BTS
let Defs = [EFLAGS] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
@@ -3680,6 +936,9 @@ def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB;
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
// Unlike with the register+register form, the memory+register form of the
// bt instruction does not ignore the high bits of the index. From ISel's
@@ -3687,17 +946,23 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
// only for now.
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
- "bt{w}\t{$src2, $src1|$src1, $src2}",
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
// [(X86bt (loadi16 addr:$src1), GR16:$src2),
// (implicit EFLAGS)]
[]
>, OpSize, TB, Requires<[FastBTMem]>;
def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
- "bt{l}\t{$src2, $src1|$src1, $src2}",
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
// [(X86bt (loadi32 addr:$src1), GR32:$src2),
// (implicit EFLAGS)]
[]
>, TB, Requires<[FastBTMem]>;
+def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi64 addr:$src1), GR64:$src2),
+// (implicit EFLAGS)]
+ []
+ >, TB;
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
@@ -3706,6 +971,10 @@ def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB;
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
@@ -3717,307 +986,129 @@ def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
]>, TB;
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+ i64immSExt8:$src2))]>, TB;
+
def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
} // Defs = [EFLAGS]
-// Sign/Zero extenders
-// Use movsbl intead of movsbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update. Actual movsbw included for the disassembler.
-def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
- "", [(set GR16:$dst, (sext GR8:$src))]>, TB;
-def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
- "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
-def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
- "movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR8:$src))]>, TB;
-def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
- "movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
-def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
- "movs{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR16:$src))]>, TB;
-def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
- "movs{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
-
-// Use movzbl intead of movzbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update. Actual movzbw included for the disassembler.
-def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
- "", [(set GR16:$dst, (zext GR8:$src))]>, TB;
-def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
- "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
-def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR8:$src))]>, TB;
-def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
-def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
- "movz{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR16:$src))]>, TB;
-def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
- "movz{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
-
-// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
-// except that they use GR32_NOREX for the output operand register class
-// instead of GR32. This allows them to operate on h registers on x86-64.
-def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
- (outs GR32_NOREX:$dst), (ins GR8:$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- []>, TB;
-let mayLoad = 1 in
-def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
- (outs GR32_NOREX:$dst), (ins i8mem:$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- []>, TB;
-
-let neverHasSideEffects = 1 in {
- let Defs = [AX], Uses = [AL] in
- def CBW : I<0x98, RawFrm, (outs), (ins),
- "{cbtw|cbw}", []>, OpSize; // AX = signext(AL)
- let Defs = [EAX], Uses = [AX] in
- def CWDE : I<0x98, RawFrm, (outs), (ins),
- "{cwtl|cwde}", []>; // EAX = signext(AX)
-
- let Defs = [AX,DX], Uses = [AX] in
- def CWD : I<0x99, RawFrm, (outs), (ins),
- "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
- let Defs = [EAX,EDX], Uses = [EAX] in
- def CDQ : I<0x99, RawFrm, (outs), (ins),
- "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
-}
-
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// Alias instructions that map movr0 to xor.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-// FIXME: Set encoding to pseudo.
-let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
- isCodeGenOnly = 1 in {
-def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
- [(set GR8:$dst, 0)]>;
-
-// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
-// encoding and avoids a partial-register update sometimes, but doing so
-// at isel time interferes with rematerialization in the current register
-// allocator. For now, this is rewritten when the instruction is lowered
-// to an MCInst.
-def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
- "",
- [(set GR16:$dst, 0)]>, OpSize;
-
-// FIXME: Set encoding to pseudo.
-def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, 0)]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Thread Local Storage Instructions
-//
-
-// ELF TLS Support
-// All calls clobber the non-callee saved registers. ESP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
-let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in
-def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
- "leal\t$sym, %eax; "
- "call\t___tls_get_addr@PLT",
- [(X86tlsaddr tls32addr:$sym)]>,
- Requires<[In32BitMode]>;
-
-// Darwin TLS Support
-// For i386, the address of the thunk is passed on the stack, on return the
-// address of the variable is in %eax. %ecx is trashed during the function
-// call. All other registers are preserved.
-let Defs = [EAX, ECX],
- Uses = [ESP],
- usesCustomInserter = 1 in
-def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
- "# TLSCall_32",
- [(X86TLSCall addr:$sym)]>,
- Requires<[In32BitMode]>;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "movl\t%gs:$src, $dst",
- [(set GR32:$dst, (gsload addr:$src))]>, SegGS;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "movl\t%fs:$src, $dst",
- [(set GR32:$dst, (fsload addr:$src))]>, SegFS;
-
-//===----------------------------------------------------------------------===//
-// EH Pseudo Instructions
-//
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1, isCodeGenOnly = 1 in {
-def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
- "ret\t#eh_return, addr: $addr",
- [(X86ehret GR32:$addr)]>;
-
-}
//===----------------------------------------------------------------------===//
// Atomic support
//
-// Memory barriers
-
-// TODO: Get this to fold the constant into the instruction.
-def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
- "lock\n\t"
- "or{l}\t{$zero, $dst|$dst, $zero}",
- []>, Requires<[In32BitMode]>, LOCK;
-
-let hasSideEffects = 1 in {
-def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
- "#MEMBARRIER",
- [(X86MemBarrier)]>, Requires<[HasSSE2]>;
-}
// Atomic swap. These are just normal xchg instructions. But since a memory
// operand is referenced, the atomicity is ensured.
let Constraints = "$val = $dst" in {
-def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$val, i32mem:$ptr),
- "xchg{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
-def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$val, i16mem:$ptr),
- "xchg{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
- OpSize;
def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
- "xchg{b}\t{$val, $ptr|$ptr, $val}",
+ "xchg{b}\t{$val, $ptr|$ptr, $val}",
[(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>;
+def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr),
+ "xchg{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
+ OpSize;
+def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr),
+ "xchg{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
+def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr),
+ "xchg{q}\t{$val, $ptr|$ptr, $val}",
+ [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
-def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
- "xchg{l}\t{$val, $src|$src, $val}", []>;
-def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
- "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
"xchg{b}\t{$val, $src|$src, $val}", []>;
+def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
+ "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
+def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
+ "xchg{l}\t{$val, $src|$src, $val}", []>;
+def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
+ "xchg{q}\t{$val, $src|$src, $val}", []>;
}
def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
"xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
"xchg{l}\t{$src, %eax|%eax, $src}", []>;
+def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
+ "xchg{q}\t{$src, %rax|%rax, $src}", []>;
-// Atomic compare and swap.
-let Defs = [EAX, EFLAGS], Uses = [EAX] in {
-def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
- "lock\n\t"
- "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
-}
-let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
-def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
- "lock\n\t"
- "cmpxchg8b\t$ptr",
- [(X86cas8 addr:$ptr)]>, TB, LOCK;
-}
-
-let Defs = [AX, EFLAGS], Uses = [AX] in {
-def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
- "lock\n\t"
- "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
-}
-let Defs = [AL, EFLAGS], Uses = [AL] in {
-def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
- "lock\n\t"
- "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
-}
-// Atomic exchange and add
-let Constraints = "$val = $dst", Defs = [EFLAGS] in {
-def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
- "lock\n\t"
- "xadd{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
- TB, LOCK;
-def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
- "lock\n\t"
- "xadd{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
- TB, OpSize, LOCK;
-def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
- "lock\n\t"
- "xadd{b}\t{$val, $ptr|$ptr, $val}",
- [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
- TB, LOCK;
-}
def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
"xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
@@ -4025,6 +1116,8 @@ def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
"xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
let mayLoad = 1, mayStore = 1 in {
def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -4033,6 +1126,9 @@ def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
}
def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
@@ -4041,6 +1137,8 @@ def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
"cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
let mayLoad = 1, mayStore = 1 in {
def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -4049,284 +1147,29 @@ def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
"cmpxchg8b\t$dst", []>, TB;
-// Optimized codegen when the non-memory output is not used.
-// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
-def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "lock\n\t"
- "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
- "lock\n\t"
- "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
- "lock\n\t"
- "inc{b}\t$dst", []>, LOCK;
-def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
- "lock\n\t"
- "inc{w}\t$dst", []>, OpSize, LOCK;
-def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
- "lock\n\t"
- "inc{l}\t$dst", []>, LOCK;
-
-def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
- "lock\n\t"
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
- "lock\n\t"
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
- "lock\n\t"
- "dec{b}\t$dst", []>, LOCK;
-def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
- "lock\n\t"
- "dec{w}\t$dst", []>, OpSize, LOCK;
-def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
- "lock\n\t"
- "dec{l}\t$dst", []>, LOCK;
-}
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
+ "cmpxchg16b\t$dst", []>, TB;
-// Atomic exchange, and, or, xor
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomInserter = 1 in {
-def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMAND32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
-def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMOR32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
-def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMXOR32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
-def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMNAND32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
-def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
- "#ATOMMIN32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
-def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMMAX32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMUMIN32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMUMAX32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
-
-def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMAND16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
-def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMOR16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
-def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMXOR16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
-def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMNAND16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
-def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
- "#ATOMMIN16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
-def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMMAX16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMUMIN16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMUMAX16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
-
-def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMAND8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
-def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMOR8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
-def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMXOR8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
-def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMNAND8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
-}
-let Constraints = "$val1 = $dst1, $val2 = $dst2",
- Defs = [EFLAGS, EAX, EBX, ECX, EDX],
- Uses = [EAX, EBX, ECX, EDX],
- mayLoad = 1, mayStore = 1,
- usesCustomInserter = 1 in {
-def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMAND6432 PSEUDO!", []>;
-def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMOR6432 PSEUDO!", []>;
-def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMXOR6432 PSEUDO!", []>;
-def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMNAND6432 PSEUDO!", []>;
-def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMADD6432 PSEUDO!", []>;
-def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMSUB6432 PSEUDO!", []>;
-def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMSWAP6432 PSEUDO!", []>;
-}
-// Segmentation support instructions.
-
-def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-
-// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
-def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
- "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
-
-def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
-def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
-def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
- "ltr{w}\t{$src}", []>, TB;
-def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
- "ltr{w}\t{$src}", []>, TB;
-
-def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
- "push{w}\t%fs", []>, OpSize, TB;
-def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
- "push{l}\t%fs", []>, TB;
-def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
- "push{w}\t%gs", []>, OpSize, TB;
-def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
- "push{l}\t%gs", []>, TB;
-
-def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
- "pop{w}\t%fs", []>, OpSize, TB;
-def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
- "pop{l}\t%fs", []>, TB;
-def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
- "pop{w}\t%gs", []>, OpSize, TB;
-def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
- "pop{l}\t%gs", []>, TB;
-
-def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lds{l}\t{$src, $dst|$dst, $src}", []>;
-def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "les{l}\t{$src, $dst|$dst, $src}", []>;
-def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
- "verr\t$seg", []>, TB;
-def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
- "verr\t$seg", []>, TB;
-def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
- "verw\t$seg", []>, TB;
-def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
- "verw\t$seg", []>, TB;
-
-// Descriptor-table support instructions
-
-def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
- "sgdt\t$dst", []>, TB;
-def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
- "sidt\t$dst", []>, TB;
-def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
- "sldt{w}\t$dst", []>, TB;
-def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
- "sldt{w}\t$dst", []>, TB;
-def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
- "lgdt\t$src", []>, TB;
-def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
- "lidt\t$src", []>, TB;
-def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
- "lldt{w}\t$src", []>, TB;
-def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
- "lldt{w}\t$src", []>, TB;
-
// Lock instruction prefix
def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>;
+// Rex64 instruction prefix
+def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>;
+
+// Data16 instruction prefix
+def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>;
+
// Repeat string operation instruction prefixes
// These uses the DF flag in the EFLAGS register to inc or dec ECX
let Defs = [ECX], Uses = [ECX,EFLAGS] in {
@@ -4336,35 +1179,19 @@ def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>;
def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>;
}
-// Segment override instruction prefixes
-def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
-def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
-def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
-def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
-def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
-def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
// String manipulation instructions
-
def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize;
def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>;
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>;
def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize;
def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>;
-// CPU flow control instructions
-
-def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
-def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
-
-// FPU control instructions
-
-def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
// Flag instructions
-
def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>;
def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>;
def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
@@ -4376,620 +1203,423 @@ def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>;
def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
// Table lookup instructions
-
def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>;
-// Specialized register support
-
-def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
-def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
-def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
-
-def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
- "smsw{w}\t$dst", []>, OpSize, TB;
-def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
- "smsw{l}\t$dst", []>, TB;
-// For memory operands, there is only a 16-bit form
-def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
- "smsw{w}\t$dst", []>, TB;
-
-def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
- "lmsw{w}\t$src", []>, TB;
-def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
- "lmsw{w}\t$src", []>, TB;
-
-def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
-
-// Cache instructions
-
-def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
-def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
-
-// VMX instructions
-
-// 66 0F 38 80
-def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
-// 66 0F 38 81
-def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
-// 0F 01 C1
-def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
-def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
- "vmclear\t$vmcs", []>, OpSize, TB;
-// 0F 01 C2
-def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
-// 0F 01 C3
-def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
-def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
- "vmptrld\t$vmcs", []>, TB;
-def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
- "vmptrst\t$vmcs", []>, TB;
-def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
-// 0F 01 C4
-def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
-def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
- "vmxon\t{$vmxon}", []>, XS;
+// ASCII Adjust After Addition
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>;
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
+// ASCII Adjust AX Before Division
+// sets AL, AH and EFLAGS and uses AL and AH
+def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
+ "aad\t$src", []>, Requires<[In32BitMode]>;
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
-def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
-def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
-def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
-def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
-def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
-def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
-
-def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
- (ADD32ri GR32:$src1, tconstpool:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
- (ADD32ri GR32:$src1, tjumptable:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
- (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
- (ADD32ri GR32:$src1, texternalsym:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
- (ADD32ri GR32:$src1, tblockaddress:$src2)>;
-
-def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
- (MOV32mi addr:$dst, tglobaladdr:$src)>;
-def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
- (MOV32mi addr:$dst, texternalsym:$src)>;
-def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
- (MOV32mi addr:$dst, tblockaddress:$src)>;
-
-// Calls
-// tailcall stuff
-def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
- (TCRETURNri GR32_TC:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
-
-// FIXME: This is disabled for 32-bit PIC mode because the global base
-// register which is part of the address mode may be assigned a
-// callee-saved register.
-def : Pat<(X86tcret (load addr:$dst), imm:$off),
- (TCRETURNmi addr:$dst, imm:$off)>,
- Requires<[In32BitMode, IsNotPIC]>;
-
-def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
- (TCRETURNdi texternalsym:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
-
-def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
- (TCRETURNdi texternalsym:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
-
-// Normal calls, with various flavors of addresses.
-def : Pat<(X86call (i32 tglobaladdr:$dst)),
- (CALLpcrel32 tglobaladdr:$dst)>;
-def : Pat<(X86call (i32 texternalsym:$dst)),
- (CALLpcrel32 texternalsym:$dst)>;
-def : Pat<(X86call (i32 imm:$dst)),
- (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
-
-// X86 specific add which produces a flag.
-def : Pat<(addc GR32:$src1, GR32:$src2),
- (ADD32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(addc GR32:$src1, (load addr:$src2)),
- (ADD32rm GR32:$src1, addr:$src2)>;
-def : Pat<(addc GR32:$src1, imm:$src2),
- (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
- (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-def : Pat<(subc GR32:$src1, GR32:$src2),
- (SUB32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(subc GR32:$src1, (load addr:$src2)),
- (SUB32rm GR32:$src1, addr:$src2)>;
-def : Pat<(subc GR32:$src1, imm:$src2),
- (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
- (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// Comparisons.
-
-// TEST R,R is smaller than CMP R,0
-def : Pat<(X86cmp GR8:$src1, 0),
- (TEST8rr GR8:$src1, GR8:$src1)>;
-def : Pat<(X86cmp GR16:$src1, 0),
- (TEST16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(X86cmp GR32:$src1, 0),
- (TEST32rr GR32:$src1, GR32:$src1)>;
-
-// Conditional moves with folded loads with operands swapped and conditions
-// inverted.
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS),
- (CMOVAE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS),
- (CMOVAE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS),
- (CMOVB16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS),
- (CMOVB32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS),
- (CMOVNE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS),
- (CMOVNE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS),
- (CMOVE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS),
- (CMOVE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS),
- (CMOVA16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS),
- (CMOVA32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS),
- (CMOVBE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS),
- (CMOVBE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS),
- (CMOVGE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS),
- (CMOVGE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS),
- (CMOVL16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS),
- (CMOVL32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS),
- (CMOVG16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS),
- (CMOVG32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS),
- (CMOVLE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS),
- (CMOVLE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS),
- (CMOVNP16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS),
- (CMOVNP32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS),
- (CMOVP16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS),
- (CMOVP32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS),
- (CMOVNS16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS),
- (CMOVNS32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS),
- (CMOVS16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS),
- (CMOVS32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS),
- (CMOVNO16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS),
- (CMOVNO32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS),
- (CMOVO16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS),
- (CMOVO32rm GR32:$src2, addr:$src1)>;
-
-// zextload bool -> zextload byte
-def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-
-// extload bool -> extload byte
-def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
-def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
-
-// anyext. Define these to do an explicit zero-extend to
-// avoid partial-register updates.
-def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
-def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
-
-// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
-def : Pat<(i32 (anyext GR16:$src)),
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
+// ASCII Adjust AX After Multiply
+// sets AL, AH and EFLAGS and uses AL
+def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
+ "aam\t$src", []>, Requires<[In32BitMode]>;
+// ASCII Adjust AL After Subtraction - sets
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>;
-//===----------------------------------------------------------------------===//
-// Some peepholes
-//===----------------------------------------------------------------------===//
-
-// Odd encoding trick: -128 fits into an 8-bit immediate field while
-// +128 doesn't, so in this special case use a sub instead of an add.
-def : Pat<(add GR16:$src1, 128),
- (SUB16ri8 GR16:$src1, -128)>;
-def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
- (SUB16mi8 addr:$dst, -128)>;
-def : Pat<(add GR32:$src1, 128),
- (SUB32ri8 GR32:$src1, -128)>;
-def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
- (SUB32mi8 addr:$dst, -128)>;
-
-// r & (2^16-1) ==> movz
-def : Pat<(and GR32:$src1, 0xffff),
- (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR32:$src1, 0xff),
- (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
- GR32_ABCD)),
- sub_8bit))>,
- Requires<[In32BitMode]>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
- GR16_ABCD)),
- sub_8bit))>,
- Requires<[In32BitMode]>;
-
-// sext_inreg patterns
-def : Pat<(sext_inreg GR32:$src, i16),
- (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
-def : Pat<(sext_inreg GR32:$src, i8),
- (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
- GR32_ABCD)),
- sub_8bit))>,
- Requires<[In32BitMode]>;
-def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
- GR16_ABCD)),
- sub_8bit))>,
- Requires<[In32BitMode]>;
-
-// trunc patterns
-def : Pat<(i16 (trunc GR32:$src)),
- (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
-def : Pat<(i8 (trunc GR32:$src)),
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- sub_8bit)>,
- Requires<[In32BitMode]>;
-def : Pat<(i8 (trunc GR16:$src)),
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit)>,
- Requires<[In32BitMode]>;
-
-// h-register tricks
-def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi)>,
- Requires<[In32BitMode]>;
-def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- sub_8bit_hi)>,
- Requires<[In32BitMode]>;
-def : Pat<(srl GR16:$src, (i8 8)),
- (EXTRACT_SUBREG
- (MOVZX32rr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi)),
- sub_16bit)>,
- Requires<[In32BitMode]>;
-def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
- (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
- GR16_ABCD)),
- sub_8bit_hi))>,
- Requires<[In32BitMode]>;
-def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
- (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
- GR16_ABCD)),
- sub_8bit_hi))>,
- Requires<[In32BitMode]>;
-def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
- (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
- GR32_ABCD)),
- sub_8bit_hi))>,
- Requires<[In32BitMode]>;
-def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
- (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
- GR32_ABCD)),
- sub_8bit_hi))>,
- Requires<[In32BitMode]>;
-
-// (shl x, 1) ==> (add x, x)
-def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
-def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
-
-// (shl x (and y, 31)) ==> (shl x, y)
-def : Pat<(shl GR8:$src1, (and CL, 31)),
- (SHL8rCL GR8:$src1)>;
-def : Pat<(shl GR16:$src1, (and CL, 31)),
- (SHL16rCL GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (and CL, 31)),
- (SHL32rCL GR32:$src1)>;
-def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
- (SHL8mCL addr:$dst)>;
-def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
- (SHL16mCL addr:$dst)>;
-def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
- (SHL32mCL addr:$dst)>;
-
-def : Pat<(srl GR8:$src1, (and CL, 31)),
- (SHR8rCL GR8:$src1)>;
-def : Pat<(srl GR16:$src1, (and CL, 31)),
- (SHR16rCL GR16:$src1)>;
-def : Pat<(srl GR32:$src1, (and CL, 31)),
- (SHR32rCL GR32:$src1)>;
-def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
- (SHR8mCL addr:$dst)>;
-def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
- (SHR16mCL addr:$dst)>;
-def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
- (SHR32mCL addr:$dst)>;
-
-def : Pat<(sra GR8:$src1, (and CL, 31)),
- (SAR8rCL GR8:$src1)>;
-def : Pat<(sra GR16:$src1, (and CL, 31)),
- (SAR16rCL GR16:$src1)>;
-def : Pat<(sra GR32:$src1, (and CL, 31)),
- (SAR32rCL GR32:$src1)>;
-def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
- (SAR8mCL addr:$dst)>;
-def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
- (SAR16mCL addr:$dst)>;
-def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
- (SAR32mCL addr:$dst)>;
-
-// (anyext (setcc_carry)) -> (setcc_carry)
-def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C16r)>;
-def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C32r)>;
-def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C32r)>;
-
-// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in { // Try this before the selecting to OR
-def : Pat<(or_is_add GR16:$src1, imm:$src2),
- (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(or_is_add GR32:$src1, imm:$src2),
- (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
- (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
- (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(or_is_add GR16:$src1, GR16:$src2),
- (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(or_is_add GR32:$src1, GR32:$src2),
- (ADD32rr GR32:$src1, GR32:$src2)>;
-} // AddedComplexity
+// Decimal Adjust AL after Addition
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>;
-//===----------------------------------------------------------------------===//
-// EFLAGS-defining Patterns
-//===----------------------------------------------------------------------===//
+// Decimal Adjust AL after Subtraction
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>;
-// add reg, reg
-def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
-
-// add reg, mem
-def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
- (ADD8rm GR8:$src1, addr:$src2)>;
-def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
- (ADD16rm GR16:$src1, addr:$src2)>;
-def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
- (ADD32rm GR32:$src1, addr:$src2)>;
-
-// add reg, imm
-def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
-def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(add GR16:$src1, i16immSExt8:$src2),
- (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(add GR32:$src1, i32immSExt8:$src2),
- (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// sub reg, reg
-def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
-
-// sub reg, mem
-def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
- (SUB8rm GR8:$src1, addr:$src2)>;
-def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
- (SUB16rm GR16:$src1, addr:$src2)>;
-def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
- (SUB32rm GR32:$src1, addr:$src2)>;
-
-// sub reg, imm
-def : Pat<(sub GR8:$src1, imm:$src2),
- (SUB8ri GR8:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, imm:$src2),
- (SUB16ri GR16:$src1, imm:$src2)>;
-def : Pat<(sub GR32:$src1, imm:$src2),
- (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
- (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
- (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// mul reg, reg
-def : Pat<(mul GR16:$src1, GR16:$src2),
- (IMUL16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(mul GR32:$src1, GR32:$src2),
- (IMUL32rr GR32:$src1, GR32:$src2)>;
-
-// mul reg, mem
-def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
- (IMUL16rm GR16:$src1, addr:$src2)>;
-def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
- (IMUL32rm GR32:$src1, addr:$src2)>;
-
-// mul reg, imm
-def : Pat<(mul GR16:$src1, imm:$src2),
- (IMUL16rri GR16:$src1, imm:$src2)>;
-def : Pat<(mul GR32:$src1, imm:$src2),
- (IMUL32rri GR32:$src1, imm:$src2)>;
-def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
- (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
- (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// reg = mul mem, imm
-def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
- (IMUL16rmi addr:$src1, imm:$src2)>;
-def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
- (IMUL32rmi addr:$src1, imm:$src2)>;
-def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
- (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
-def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
- (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
-
-// Optimize multiply by 2 with EFLAGS result.
-let AddedComplexity = 2 in {
-def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
-}
+// Check Array Index Against Bounds
+def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bound\t{$src, $dst|$dst, $src}", []>, OpSize,
+ Requires<[In32BitMode]>;
+def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bound\t{$src, $dst|$dst, $src}", []>,
+ Requires<[In32BitMode]>;
-// Patterns for nodes that do not produce flags, for instructions that do.
-
-// Increment reg.
-def : Pat<(add GR8:$src1 , 1), (INC8r GR8:$src1)>;
-def : Pat<(add GR16:$src1, 1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src1, 1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>;
-
-// Decrement reg.
-def : Pat<(add GR8:$src1 , -1), (DEC8r GR8:$src1)>;
-def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>;
-
-// or reg/reg.
-def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
-
-// or reg/mem
-def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
- (OR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
- (OR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
- (OR32rm GR32:$src1, addr:$src2)>;
-
-// or reg/imm
-def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, i16immSExt8:$src2),
- (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(or GR32:$src1, i32immSExt8:$src2),
- (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// xor reg/reg
-def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
-
-// xor reg/mem
-def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
- (XOR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
- (XOR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
- (XOR32rm GR32:$src1, addr:$src2)>;
-
-// xor reg/imm
-def : Pat<(xor GR8:$src1, imm:$src2),
- (XOR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, imm:$src2),
- (XOR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(xor GR32:$src1, imm:$src2),
- (XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
- (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
- (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// and reg/reg
-def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
-
-// and reg/mem
-def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
- (AND8rm GR8:$src1, addr:$src2)>;
-def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
- (AND16rm GR16:$src1, addr:$src2)>;
-def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
- (AND32rm GR32:$src1, addr:$src2)>;
-
-// and reg/imm
-def : Pat<(and GR8:$src1, imm:$src2),
- (AND8ri GR8:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, imm:$src2),
- (AND16ri GR16:$src1, imm:$src2)>;
-def : Pat<(and GR32:$src1, imm:$src2),
- (AND32ri GR32:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, i16immSExt8:$src2),
- (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(and GR32:$src1, i32immSExt8:$src2),
- (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+// Adjust RPL Field of Segment Selector
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
+ "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
+ "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
//===----------------------------------------------------------------------===//
-// Floating Point Stack Support
+// Subsystems.
//===----------------------------------------------------------------------===//
-include "X86InstrFPStack.td"
-
-//===----------------------------------------------------------------------===//
-// X86-64 Support
-//===----------------------------------------------------------------------===//
+include "X86InstrArithmetic.td"
+include "X86InstrCMovSetCC.td"
+include "X86InstrExtension.td"
+include "X86InstrControl.td"
+include "X86InstrShiftRotate.td"
-include "X86Instr64bit.td"
+// X87 Floating Point Stack.
+include "X86InstrFPStack.td"
-//===----------------------------------------------------------------------===//
// SIMD support (SSE, MMX and AVX)
-//===----------------------------------------------------------------------===//
-
include "X86InstrFragmentsSIMD.td"
-//===----------------------------------------------------------------------===//
// FMA - Fused Multiply-Add support (requires FMA)
-//===----------------------------------------------------------------------===//
-
include "X86InstrFMA.td"
+// SSE, MMX and 3DNow! vector support.
+include "X86InstrSSE.td"
+include "X86InstrMMX.td"
+include "X86Instr3DNow.td"
+
+include "X86InstrVMX.td"
+
+// System instructions.
+include "X86InstrSystem.td"
+
+// Compiler Pseudo Instructions and Pat Patterns
+include "X86InstrCompiler.td"
+
//===----------------------------------------------------------------------===//
-// XMM Floating point support (requires SSE / SSE2)
+// Assembler Mnemonic Aliases
//===----------------------------------------------------------------------===//
-include "X86InstrSSE.td"
+def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"cbw", "cbtw">;
+def : MnemonicAlias<"cwd", "cwtd">;
+def : MnemonicAlias<"cdq", "cltd">;
+def : MnemonicAlias<"cwde", "cwtl">;
+def : MnemonicAlias<"cdqe", "cltq">;
+
+// lret maps to lretl, it is not ambiguous with lretq.
+def : MnemonicAlias<"lret", "lretl">;
+
+def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd", "popfl">;
+
+// FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in
+// all modes. However: "push (addr)" and "push $42" should default to
+// pushl/pushq depending on the current mode. Similar for "pop %bx"
+def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl">;
+
+def : MnemonicAlias<"repe", "rep">;
+def : MnemonicAlias<"repz", "rep">;
+def : MnemonicAlias<"repnz", "repne">;
+
+def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"salb", "shlb">;
+def : MnemonicAlias<"salw", "shlw">;
+def : MnemonicAlias<"sall", "shll">;
+def : MnemonicAlias<"salq", "shlq">;
+
+def : MnemonicAlias<"smovb", "movsb">;
+def : MnemonicAlias<"smovw", "movsw">;
+def : MnemonicAlias<"smovl", "movsl">;
+def : MnemonicAlias<"smovq", "movsq">;
+
+def : MnemonicAlias<"ud2a", "ud2">;
+def : MnemonicAlias<"verrw", "verr">;
+
+// System instruction aliases.
+def : MnemonicAlias<"iret", "iretl">;
+def : MnemonicAlias<"sysret", "sysretl">;
+
+def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>;
+
+
+// Floating point stack aliases.
+def : MnemonicAlias<"fcmovz", "fcmove">;
+def : MnemonicAlias<"fcmova", "fcmovnbe">;
+def : MnemonicAlias<"fcmovnae", "fcmovb">;
+def : MnemonicAlias<"fcmovna", "fcmovbe">;
+def : MnemonicAlias<"fcmovae", "fcmovnb">;
+def : MnemonicAlias<"fcomip", "fcompi">;
+def : MnemonicAlias<"fildq", "fildll">;
+def : MnemonicAlias<"fldcww", "fldcw">;
+def : MnemonicAlias<"fnstcww", "fnstcw">;
+def : MnemonicAlias<"fnstsww", "fnstsw">;
+def : MnemonicAlias<"fucomip", "fucompi">;
+def : MnemonicAlias<"fwait", "wait">;
+
+
+class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond>
+ : MnemonicAlias<!strconcat(Prefix, OldCond, Suffix),
+ !strconcat(Prefix, NewCond, Suffix)>;
+
+/// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of
+/// MnemonicAlias's that canonicalize the condition code in a mnemonic, for
+/// example "setz" -> "sete".
+multiclass IntegerCondCodeMnemonicAlias<string Prefix, string Suffix> {
+ def C : CondCodeAlias<Prefix, Suffix, "c", "b">; // setc -> setb
+ def Z : CondCodeAlias<Prefix, Suffix, "z" , "e">; // setz -> sete
+ def NA : CondCodeAlias<Prefix, Suffix, "na", "be">; // setna -> setbe
+ def NB : CondCodeAlias<Prefix, Suffix, "nb", "ae">; // setnb -> setae
+ def NC : CondCodeAlias<Prefix, Suffix, "nc", "ae">; // setnc -> setae
+ def NG : CondCodeAlias<Prefix, Suffix, "ng", "le">; // setng -> setle
+ def NL : CondCodeAlias<Prefix, Suffix, "nl", "ge">; // setnl -> setge
+ def NZ : CondCodeAlias<Prefix, Suffix, "nz", "ne">; // setnz -> setne
+ def PE : CondCodeAlias<Prefix, Suffix, "pe", "p">; // setpe -> setp
+ def PO : CondCodeAlias<Prefix, Suffix, "po", "np">; // setpo -> setnp
+
+ def NAE : CondCodeAlias<Prefix, Suffix, "nae", "b">; // setnae -> setb
+ def NBE : CondCodeAlias<Prefix, Suffix, "nbe", "a">; // setnbe -> seta
+ def NGE : CondCodeAlias<Prefix, Suffix, "nge", "l">; // setnge -> setl
+ def NLE : CondCodeAlias<Prefix, Suffix, "nle", "g">; // setnle -> setg
+}
+
+// Aliases for set<CC>
+defm : IntegerCondCodeMnemonicAlias<"set", "">;
+// Aliases for j<CC>
+defm : IntegerCondCodeMnemonicAlias<"j", "">;
+// Aliases for cmov<CC>{w,l,q}
+defm : IntegerCondCodeMnemonicAlias<"cmov", "w">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "l">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "q">;
+
//===----------------------------------------------------------------------===//
-// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
+// Assembler Instruction Aliases
//===----------------------------------------------------------------------===//
-include "X86InstrMMX.td"
+// aad/aam default to base 10 if no operand is specified.
+def : InstAlias<"aad", (AAD8i8 10)>;
+def : InstAlias<"aam", (AAM8i8 10)>;
+
+// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
+def : InstAlias<"bt $imm, $mem", (BT32mi8 i32mem:$mem, i32i8imm:$imm)>;
+
+// clr aliases.
+def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg)>;
+def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>;
+def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>;
+def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>;
+
+// div and idiv aliases for explicit A register.
+def : InstAlias<"divb $src, %al", (DIV8r GR8 :$src)>;
+def : InstAlias<"divw $src, %ax", (DIV16r GR16:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32r GR32:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64r GR64:$src)>;
+def : InstAlias<"divb $src, %al", (DIV8m i8mem :$src)>;
+def : InstAlias<"divw $src, %ax", (DIV16m i16mem:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32m i32mem:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64m i64mem:$src)>;
+def : InstAlias<"idivb $src, %al", (IDIV8r GR8 :$src)>;
+def : InstAlias<"idivw $src, %ax", (IDIV16r GR16:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32r GR32:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64r GR64:$src)>;
+def : InstAlias<"idivb $src, %al", (IDIV8m i8mem :$src)>;
+def : InstAlias<"idivw $src, %ax", (IDIV16m i16mem:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32m i32mem:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>;
+
+
+
+// Various unary fpstack operations default to operating on on ST1.
+// For example, "fxch" -> "fxch %st(1)"
+def : InstAlias<"faddp", (ADD_FPrST0 ST1)>;
+def : InstAlias<"fsubp", (SUBR_FPrST0 ST1)>;
+def : InstAlias<"fsubrp", (SUB_FPrST0 ST1)>;
+def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>;
+def : InstAlias<"fdivp", (DIVR_FPrST0 ST1)>;
+def : InstAlias<"fdivrp", (DIV_FPrST0 ST1)>;
+def : InstAlias<"fxch", (XCH_F ST1)>;
+def : InstAlias<"fcomi", (COM_FIr ST1)>;
+def : InstAlias<"fcompi", (COM_FIPr ST1)>;
+def : InstAlias<"fucom", (UCOM_Fr ST1)>;
+def : InstAlias<"fucomp", (UCOM_FPr ST1)>;
+def : InstAlias<"fucomi", (UCOM_FIr ST1)>;
+def : InstAlias<"fucompi", (UCOM_FIPr ST1)>;
+
+// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
+// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
+// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
+// gas.
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst> {
+ def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"), (Inst RST:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"), (Inst ST0)>;
+}
+
+defm : FpUnaryAlias<"fadd", ADD_FST0r>;
+defm : FpUnaryAlias<"faddp", ADD_FPrST0>;
+defm : FpUnaryAlias<"fsub", SUB_FST0r>;
+defm : FpUnaryAlias<"fsubp", SUBR_FPrST0>;
+defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
+defm : FpUnaryAlias<"fsubrp", SUB_FPrST0>;
+defm : FpUnaryAlias<"fmul", MUL_FST0r>;
+defm : FpUnaryAlias<"fmulp", MUL_FPrST0>;
+defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
+defm : FpUnaryAlias<"fdivp", DIVR_FPrST0>;
+defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
+defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>;
+defm : FpUnaryAlias<"fcomi", COM_FIr>;
+defm : FpUnaryAlias<"fucomi", UCOM_FIr>;
+defm : FpUnaryAlias<"fcompi", COM_FIPr>;
+defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
+
+
+// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
+// solely because gas supports it.
+def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>;
+def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
+def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
+def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
+def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
+
+// We accept "fnstsw %eax" even though it only writes %ax.
+def : InstAlias<"fnstsw %eax", (FNSTSW8r)>;
+def : InstAlias<"fnstsw %al" , (FNSTSW8r)>;
+def : InstAlias<"fnstsw" , (FNSTSW8r)>;
+
+// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but
+// this is compatible with what GAS does.
+def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"lcall *$dst", (FARCALL32m opaque48mem:$dst)>;
+def : InstAlias<"ljmp *$dst", (FARJMP32m opaque48mem:$dst)>;
+
+// "imul <imm>, B" is an alias for "imul <imm>, B, B".
+def : InstAlias<"imulw $imm, $r", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm)>;
+def : InstAlias<"imulw $imm, $r", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm)>;
+def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>;
+def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>;
+
+// inb %dx -> inb %al, %dx
+def : InstAlias<"inb %dx", (IN8rr)>;
+def : InstAlias<"inw %dx", (IN16rr)>;
+def : InstAlias<"inl %dx", (IN32rr)>;
+def : InstAlias<"inb $port", (IN8ri i8imm:$port)>;
+def : InstAlias<"inw $port", (IN16ri i8imm:$port)>;
+def : InstAlias<"inl $port", (IN32ri i8imm:$port)>;
+
+
+// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp
+def : InstAlias<"call $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpw $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+
+// Force mov without a suffix with a segment and mem to prefer the 'l' form of
+// the move. All segment/mem forms are equivalent, this has the shortest
+// encoding.
+def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem)>;
+def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
+
+// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
+def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
+
+// Match 'movq GR64, MMX' as an alias for movd.
+def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>;
+def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>;
+
+// movsd with no operands (as opposed to the SSE scalar move of a double) is an
+// alias for movsl. (as in rep; movsd)
+def : InstAlias<"movsd", (MOVSD)>;
+
+// movsx aliases
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src)>;
+
+// movzx aliases
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src)>;
+// Note: No GR32->GR64 movzx form.
+
+// outb %dx -> outb %al, %dx
+def : InstAlias<"outb %dx", (OUT8rr)>;
+def : InstAlias<"outw %dx", (OUT16rr)>;
+def : InstAlias<"outl %dx", (OUT32rr)>;
+def : InstAlias<"outb $port", (OUT8ir i8imm:$port)>;
+def : InstAlias<"outw $port", (OUT16ir i8imm:$port)>;
+def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
+
+// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
+// effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
+// errors, since its encoding is the most compact.
+def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
+
+// shld/shrd op,op -> shld op, op, 1
+def : InstAlias<"shldw $r1, $r2", (SHLD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shldl $r1, $r2", (SHLD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shldq $r1, $r2", (SHLD64rri8 GR64:$r1, GR64:$r2, 1)>;
+def : InstAlias<"shrdw $r1, $r2", (SHRD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shrdl $r1, $r2", (SHRD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shrdq $r1, $r2", (SHRD64rri8 GR64:$r1, GR64:$r2, 1)>;
+
+def : InstAlias<"shldw $mem, $reg", (SHLD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shldl $mem, $reg", (SHLD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shldq $mem, $reg", (SHLD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+def : InstAlias<"shrdw $mem, $reg", (SHRD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shrdl $mem, $reg", (SHRD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shrdq $mem, $reg", (SHRD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+
+/* FIXME: This is disabled because the asm matcher is currently incapable of
+ * matching a fixed immediate like $1.
+// "shl X, $1" is an alias for "shl X".
+multiclass ShiftRotateByOneAlias<string Mnemonic, string Opc> {
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "8r1")) GR8:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "16r1")) GR16:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "32r1")) GR32:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "64r1")) GR64:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "8m1")) i8mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "16m1")) i16mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "32m1")) i32mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "64m1")) i64mem:$op)>;
+}
+
+defm : ShiftRotateByOneAlias<"rcl", "RCL">;
+defm : ShiftRotateByOneAlias<"rcr", "RCR">;
+defm : ShiftRotateByOneAlias<"rol", "ROL">;
+defm : ShiftRotateByOneAlias<"ror", "ROR">;
+FIXME */
+
+// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
+def : InstAlias<"testb $val, $mem", (TEST8rm GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"testw $val, $mem", (TEST16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"testl $val, $mem", (TEST32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"testq $val, $mem", (TEST64rm GR64:$val, i64mem:$mem)>;
+
+// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms.
+def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 11d4179..bb2165a 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -1,4 +1,4 @@
-//====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,6 +11,9 @@
// and properties of the instructions which are needed for code generation,
// machine code emission, and analysis.
//
+// All instructions that use MMX should be in this file, even if they also use
+// SSE.
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -18,58 +21,23 @@
//===----------------------------------------------------------------------===//
let Constraints = "$src1 = $dst" in {
- // MMXI_binop_rm - Simple MMX binary operator.
- multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, bit Commutable = 0> {
- def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> {
- let isCommutable = Commutable;
- }
- def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (OpVT (OpNode VR64:$src1,
- (bitconvert
- (load_mmx addr:$src2)))))]>;
- }
-
+ // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+ // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
bit Commutable = 0> {
- def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
let isCommutable = Commutable;
}
- def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>;
}
- // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64.
- //
- // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew
- // to collapse (bitconvert VT to VT) into its operand.
- //
- multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
- bit Commutable = 0> {
- def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> {
- let isCommutable = Commutable;
- }
- def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (OpNode VR64:$src1,(load_mmx addr:$src2)))]>;
- }
-
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr, Intrinsic IntId,
Intrinsic IntId2> {
@@ -89,14 +57,75 @@ let Constraints = "$src1 = $dst" in {
}
}
+/// Unary MMX instructions requiring SSSE3.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst,
+ (IntId64 (bitconvert (memopmmx addr:$src))))]>;
+}
+
+/// Binary MMX instructions requiring SSSE3.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64> {
+ let isCommutable = 0 in
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopmmx addr:$src2))))]>;
+}
+}
+
+/// PALIGN MMX instructions (require SSSE3).
+multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
+ def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
+ def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
+}
+
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+ def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm, Domain d> {
+ def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+ def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
+
//===----------------------------------------------------------------------===//
-// MMX EMMS & FEMMS Instructions
+// MMX EMMS Instruction
//===----------------------------------------------------------------------===//
def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms",
[(int_x86_mmx_emms)]>;
-def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms",
- [(int_x86_mmx_femms)]>;
//===----------------------------------------------------------------------===//
// MMX Scalar Instructions
@@ -106,12 +135,12 @@ def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms",
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v2i32 (scalar_to_vector GR32:$src)))]>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+ (x86mmx (scalar_to_vector GR32:$src)))]>;
+let canFoldAsLoad = 1 in
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+ (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>;
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
@@ -123,42 +152,41 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[]>;
-let neverHasSideEffects = 1 in
// These are 64 bit moves, but since the OS X assembler doesn't
// recognize a register-register movq, we write them as
// movd.
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
- "movd\t{$src, $dst|$dst, $src}", []>;
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (bitconvert VR64:$src))]>;
def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v1i64 (scalar_to_vector GR64:$src)))]>;
-
+ (bitconvert GR64:$src))]>;
let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1 in
def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (load_mmx addr:$src))]>;
def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(store (v1i64 VR64:$src), addr:$dst)]>;
+ [(store (x86mmx VR64:$src), addr:$dst)]>;
def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v1i64 (bitconvert
+ (x86mmx (bitconvert
(i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))))))]>;
def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
"movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (movl immAllZerosV,
- (v2i64 (scalar_to_vector
- (i64 (bitconvert (v1i64 VR64:$src)))))))]>;
+ (v2i64 (scalar_to_vector
+ (i64 (bitconvert (x86mmx VR64:$src))))))]>;
let neverHasSideEffects = 1 in
def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
@@ -176,34 +204,40 @@ let AddedComplexity = 15 in
def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
+ (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>;
let AddedComplexity = 20 in
def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
(ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v2i32 (X86vzmovl (v2i32
+ (x86mmx (X86vzmovl (x86mmx
(scalar_to_vector (loadi32 addr:$src))))))]>;
// Arithmetic Instructions
-
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>;
// -- Addition
-defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8, 1>;
-defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>;
-defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>;
-defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>;
-
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>;
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>;
defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
+defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>;
+defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>;
+
+
// -- Subtraction
-defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>;
-defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>;
-defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>;
-defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>;
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>;
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>;
defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
@@ -211,16 +245,25 @@ defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
+defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>;
+defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>;
+
// -- Multiplication
-defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>;
+defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>;
defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>;
defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+let isCommutable = 1 in
+defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
+ int_x86_ssse3_pmul_hr_sw>;
// -- Miscellanea
defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
+defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
+ int_x86_ssse3_pmadd_ub_sw>;
defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
@@ -232,23 +275,17 @@ defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>;
defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>;
-// Logical Instructions
-defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>;
-defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>;
-defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>;
+defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>;
+defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>;
+defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>;
+let Constraints = "$src1 = $dst" in
+ defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>;
-let Constraints = "$src1 = $dst" in {
- def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
- VR64:$src2)))]>;
- def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
- (load addr:$src2))))]>;
-}
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>;
+defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, 1>;
// Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
@@ -270,12 +307,6 @@ defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
-// Shift up / down and insert zero's.
-def : Pat<(v1i64 (X86vshl VR64:$src, (i8 imm:$amt))),
- (MMX_PSLLQri VR64:$src, (GetLo32XForm imm:$amt))>;
-def : Pat<(v1i64 (X86vshr VR64:$src, (i8 imm:$amt))),
- (MMX_PSRLQri VR64:$src, (GetLo32XForm imm:$amt))>;
-
// Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
@@ -285,84 +316,19 @@ defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>;
defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
-// Conversion Instructions
-
// -- Unpack Instructions
-let Constraints = "$src1 = $dst" in {
- // Unpack High Packed Data Instructions
- def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpckhbw\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpckhbw\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v8i8 (mmx_unpckh VR64:$src1,
- (bc_v8i8 (load_mmx addr:$src2)))))]>;
-
- def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpckhwd\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpckhwd\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v4i16 (mmx_unpckh VR64:$src1,
- (bc_v4i16 (load_mmx addr:$src2)))))]>;
-
- def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpckhdq\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpckhdq\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v2i32 (mmx_unpckh VR64:$src1,
- (bc_v2i32 (load_mmx addr:$src2)))))]>;
-
- // Unpack Low Packed Data Instructions
- def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpcklbw\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpcklbw\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v8i8 (mmx_unpckl VR64:$src1,
- (bc_v8i8 (load_mmx addr:$src2)))))]>;
-
- def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpcklwd\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpcklwd\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v4i16 (mmx_unpckl VR64:$src1,
- (bc_v4i16 (load_mmx addr:$src2)))))]>;
-
- def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpckldq\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpckldq\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v2i32 (mmx_unpckl VR64:$src1,
- (bc_v2i32 (load_mmx addr:$src2)))))]>;
-}
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
+ int_x86_mmx_punpckhbw>;
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
+ int_x86_mmx_punpckhwd>;
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
+ int_x86_mmx_punpckhdq>;
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
+ int_x86_mmx_punpcklbw>;
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
+ int_x86_mmx_punpcklwd>;
+defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
+ int_x86_mmx_punpckldq>;
// -- Pack Instructions
defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
@@ -370,93 +336,80 @@ defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>;
defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
// -- Shuffle Instructions
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>;
+
def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
- (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>;
+ (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
- (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)),
- (undef)))]>;
+ (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+ imm:$src2))]>;
-// -- Conversion Instructions
-let neverHasSideEffects = 1 in {
-def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst),
- (ins f128mem:$src),
- "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
- "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst),
- (ins i64mem:$src),
- "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
- "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst),
- (ins i64mem:$src),
- "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
- "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst),
- (ins f128mem:$src),
- "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
- "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
-} // end neverHasSideEffects
-// Extract / Insert
-def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW",
- SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>,
- SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
-def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg,
- (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2),
+// -- Conversion Instructions
+defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+ f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+ f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+ f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+ f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+ i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+let Constraints = "$src1 = $dst" in {
+ defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+ int_x86_sse_cvtpi2ps,
+ i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+}
+
+// Extract / Insert
+def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1),
+ [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
(iPTR imm:$src2)))]>;
let Constraints = "$src1 = $dst" in {
- def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
+ def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
(outs VR64:$dst),
- (ins VR64:$src1, GR32:$src2,i16i8imm:$src3),
+ (ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
- GR32:$src2,(iPTR imm:$src3))))]>;
- def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem,
+ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+ GR32:$src2, (iPTR imm:$src3)))]>;
+
+ def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
(outs VR64:$dst),
- (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3),
+ (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR64:$dst,
- (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
- (i32 (anyext (loadi16 addr:$src2))),
- (iPTR imm:$src3))))]>;
+ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+ (i32 (anyext (loadi16 addr:$src2))),
+ (iPTR imm:$src3)))]>;
}
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+
// MMX to XMM for vector types
def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
- [SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>;
+ [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
(v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
@@ -464,14 +417,19 @@ def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
(v2i64 (MOVQI2PQIrm addr:$src))>;
-def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert
- (v2i32 (scalar_to_vector (loadi32 addr:$src))))))),
+def : Pat<(v2i64 (MMX_X86movq2dq
+ (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
(v2i64 (MOVDI2PDIrm addr:$src))>;
-// Mask creation
-def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>;
+// Low word of XMM to MMX.
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+ [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
+ (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+ (x86mmx (MMX_MOVQ64rm addr:$src))>;
// Misc.
let Uses = [EDI] in
@@ -483,181 +441,14 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// Alias instructions that map zero vector to pxor.
-let isReMaterializable = 1, isCodeGenOnly = 1 in {
- // FIXME: Change encoding to pseudo.
- def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "",
- [(set VR64:$dst, (v2i32 immAllZerosV))]>;
- def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), "",
- [(set VR64:$dst, (v2i32 immAllOnesV))]>;
-}
-
-let Predicates = [HasMMX] in {
- def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>;
- def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>;
- def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>;
-}
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// Store 64-bit integer vector values.
-def : Pat<(store (v8i8 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v4i16 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v2i32 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v1i64 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-
-// Bit convert.
-def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>;
-
// 64-bit bit convert.
-def : Pat<(v1i64 (bitconvert (i64 GR64:$src))),
+def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v2i32 (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v4i16 (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v8i8 (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(i64 (bitconvert (v1i64 VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v2i32 VR64:$src))),
+def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v4i16 VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v8i8 VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v1i64 VR64:$src))),
- (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v2i32 VR64:$src))),
- (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
+def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
- (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (f64 FR64:$src))),
- (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v2i32 (bitconvert (f64 FR64:$src))),
- (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v4i16 (bitconvert (f64 FR64:$src))),
+def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
(MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v8i8 (bitconvert (f64 FR64:$src))),
- (MMX_MOVFR642Qrr FR64:$src)>;
-
-let AddedComplexity = 20 in {
- def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
- (MMX_MOVZDI2PDIrm addr:$src)>;
-}
-
-// Clear top half.
-let AddedComplexity = 15 in {
- def : Pat<(v2i32 (X86vzmovl VR64:$src)),
- (MMX_PUNPCKLDQrr VR64:$src, (v2i32 (MMX_V_SET0)))>;
-}
-
-// Patterns to perform canonical versions of vector shuffling.
-let AddedComplexity = 10 in {
- def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))),
- (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>;
- def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))),
- (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>;
- def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))),
- (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>;
-}
-let AddedComplexity = 10 in {
- def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))),
- (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>;
- def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))),
- (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>;
- def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))),
- (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
-}
-// Some special case PANDN patterns.
-// FIXME: Get rid of these.
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
- VR64:$src2)),
- (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
- (load addr:$src2))),
- (MMX_PANDNrm VR64:$src1, addr:$src2)>;
-
-// Move MMX to lower 64-bit of XMM
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v4i16 VR64:$src))))),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v2i32 VR64:$src))))),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v1i64 VR64:$src))))),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-
-// Move lower 64-bit of XMM to MMX.
-def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))),
- (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>;
-def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))),
- (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>;
-def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))),
- (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
-
-// Patterns for vector comparisons
-def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, VR64:$src2)),
- (MMX_PCMPEQBrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPEQBrm VR64:$src1, addr:$src2)>;
-def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, VR64:$src2)),
- (MMX_PCMPEQWrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPEQWrm VR64:$src1, addr:$src2)>;
-def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, VR64:$src2)),
- (MMX_PCMPEQDrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPEQDrm VR64:$src1, addr:$src2)>;
-
-def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, VR64:$src2)),
- (MMX_PCMPGTBrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPGTBrm VR64:$src1, addr:$src2)>;
-def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, VR64:$src2)),
- (MMX_PCMPGTWrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPGTWrm VR64:$src1, addr:$src2)>;
-def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)),
- (MMX_PCMPGTDrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPGTDrm VR64:$src1, addr:$src2)>;
-
-// CMOV* - Used to implement the SELECT DAG operation. Expanded after
-// instruction selection into a branch sequence.
-let Uses = [EFLAGS], usesCustomInserter = 1 in {
- def CMOV_V1I64 : I<0, Pseudo,
- (outs VR64:$dst), (ins VR64:$t, VR64:$f, i8imm:$cond),
- "#CMOV_V1I64 PSEUDO!",
- [(set VR64:$dst,
- (v1i64 (X86cmov VR64:$t, VR64:$f, imm:$cond,
- EFLAGS)))]>;
-}
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index f5466f8..b912949 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -15,43 +15,6 @@
//===----------------------------------------------------------------------===//
-// SSE scalar FP Instructions
-//===----------------------------------------------------------------------===//
-
-// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
-// instruction selection into a branch sequence.
-let Uses = [EFLAGS], usesCustomInserter = 1 in {
- def CMOV_FR32 : I<0, Pseudo,
- (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
- "#CMOV_FR32 PSEUDO!",
- [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
- EFLAGS))]>;
- def CMOV_FR64 : I<0, Pseudo,
- (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
- "#CMOV_FR64 PSEUDO!",
- [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
- EFLAGS))]>;
- def CMOV_V4F32 : I<0, Pseudo,
- (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
- "#CMOV_V4F32 PSEUDO!",
- [(set VR128:$dst,
- (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
- EFLAGS)))]>;
- def CMOV_V2F64 : I<0, Pseudo,
- (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
- "#CMOV_V2F64 PSEUDO!",
- [(set VR128:$dst,
- (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
- EFLAGS)))]>;
- def CMOV_V2I64 : I<0, Pseudo,
- (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
- "#CMOV_V2I64 PSEUDO!",
- [(set VR128:$dst,
- (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
- EFLAGS)))]>;
-}
-
-//===----------------------------------------------------------------------===//
// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//
@@ -82,17 +45,15 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
- !strconcat(SSEVer, !strconcat("_",
- !strconcat(OpcodeStr, FPSizeStr))))
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, RC:$src2))]>;
def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
- !strconcat(SSEVer, !strconcat("_",
- !strconcat(OpcodeStr, FPSizeStr))))
+ [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
+ SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, mem_cpat:$src2))]>;
}
@@ -142,17 +103,15 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_",
- !strconcat(SSEVer, !strconcat("_",
- !strconcat(OpcodeStr, FPSizeStr))))
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, RC:$src2))], d>;
def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_",
- !strconcat(SSEVer, !strconcat("_",
- !strconcat(OpcodeStr, FPSizeStr))))
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, (mem_frag addr:$src2)))], d>;
}
@@ -221,6 +180,12 @@ def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
// Implicitly promote a 64-bit scalar to a vector.
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
@@ -403,7 +368,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
string asm_opr> {
def PSrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- !strconcat(!strconcat(base_opc,"s"), asm_opr),
+ !strconcat(base_opc, "s", asm_opr),
[(set RC:$dst,
(mov_frag RC:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
@@ -411,7 +376,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
def PDrm : PI<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, f64mem:$src2),
- !strconcat(!strconcat(base_opc,"d"), asm_opr),
+ !strconcat(base_opc, "d", asm_opr),
[(set RC:$dst, (v2f64 (mov_frag RC:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
SSEPackedDouble>, TB, OpSize;
@@ -598,14 +563,6 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
-multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
- string asm, Domain d> {
- def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (Int SrcRC:$src))], d>;
- def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
-}
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
@@ -618,16 +575,6 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
[(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
}
-multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
- RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
- PatFrag ld_frag, string asm, Domain d> {
- def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
- asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
- def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src2), asm,
- [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
-}
-
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
PatFrag ld_frag, string asm, bit Is2Addr = 1> {
@@ -669,13 +616,11 @@ defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
f32mem, load, "cvtss2si">, XS;
defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
f32mem, load, "cvtss2si{q}">, XS, REX_W;
-defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si">, XD;
-defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
- f128mem, load, "cvtsd2si">, XD, REX_W;
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si{l}">, XD;
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+ f128mem, load, "cvtsd2si{q}">, XD, REX_W;
-defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD,
- REX_W;
let isAsmParserOnly = 1 in {
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
@@ -705,29 +650,6 @@ let Constraints = "$src1 = $dst" in {
"cvtsi2sd">, XD, REX_W;
}
-// Instructions below don't have an AVX form.
-defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
- f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB;
-defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
- f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
- SSEPackedDouble>, TB, OpSize;
-defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
- f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB;
-defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
- f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
- SSEPackedDouble>, TB, OpSize;
-defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
- i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
- SSEPackedDouble>, TB, OpSize;
-let Constraints = "$src1 = $dst" in {
- defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
- int_x86_sse_cvtpi2ps,
- i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- SSEPackedSingle>, TB;
-}
-
/// SSE 1 Only
// Aliases for intrinsics
@@ -738,10 +660,10 @@ defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, f32mem, load,
"cvttss2si">, XS, VEX, VEX_W;
defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttss2si">, XD, VEX;
+ f128mem, load, "cvttsd2si">, XD, VEX;
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
- "cvttss2si">, XD, VEX, VEX_W;
+ "cvttsd2si">, XD, VEX, VEX_W;
}
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
f32mem, load, "cvttss2si">, XS;
@@ -749,10 +671,10 @@ defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, f32mem, load,
"cvttss2si{q}">, XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttss2si">, XD;
+ f128mem, load, "cvttsd2si">, XD;
defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
- "cvttss2si{q}">, XD, REX_W;
+ "cvttsd2si{q}">, XD, REX_W;
let isAsmParserOnly = 1, Pattern = []<dag> in {
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
@@ -790,6 +712,9 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
}
+def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+ Requires<[HasAVX]>;
+
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))]>;
@@ -817,6 +742,9 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
}
+def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
+ Requires<[HasAVX]>;
+
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))]>, XS,
@@ -973,9 +901,13 @@ def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))]>;
def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
let isAsmParserOnly = 1 in {
@@ -990,16 +922,6 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(memop addr:$src)))]>,
XS, VEX, Requires<[HasAVX]>;
}
-def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq VR128:$src))]>,
- XS, Requires<[HasSSE2]>;
-def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (memop addr:$src)))]>,
- XS, Requires<[HasSSE2]>;
let isAsmParserOnly = 1 in {
def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
@@ -1013,13 +935,13 @@ def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>, VEX;
}
-def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
-def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))]>;
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>;
let isAsmParserOnly = 1 in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
@@ -1469,9 +1391,11 @@ let AddedComplexity = 10 in {
/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
Domain d> {
- def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set GR32:$dst, (Int RC:$src))], d>;
+ def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
}
// Mask creation
@@ -1522,6 +1446,12 @@ def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
[(set FR64:$dst, fpimm0)]>,
Requires<[HasSSE2]>, TB, OpSize;
+def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
}
// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
@@ -1654,19 +1584,13 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
let isCommutable = 0 in
defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
// single r+r
- [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
- (bc_v2i64 (v4i32 immAllOnesV))),
- VR128:$src2)))],
+ [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))],
// double r+r
- [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (bc_v2i64 (v2f64 VR128:$src2))))],
+ [],
// single r+m
- [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
- (bc_v2i64 (v4i32 immAllOnesV))),
- (memopv2i64 addr:$src2))))],
+ [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))],
// double r+m
- [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (memopv2i64 addr:$src2)))]]>;
+ []]>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Arithmetic Instructions
@@ -2170,7 +2094,7 @@ def : Pat<(X86SFence), (SFENCE)>;
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
// FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementatioan, it does not expand the instructions below like
+// JIT implementation, it does not expand the instructions below like
// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isCodeGenOnly = 1 in {
@@ -2277,6 +2201,10 @@ let neverHasSideEffects = 1 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>;
+def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ []>, XS, Requires<[HasSSE2]>;
+
let canFoldAsLoad = 1, mayLoad = 1 in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
@@ -2606,15 +2534,11 @@ let ExeDomain = SSEPackedInt in {
}
def PANDNrr : PDI<0xDF, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- VR128:$src2)))]>;
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
def PANDNrm : PDI<0xDF, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- (memopv2i64 addr:$src2))))]>;
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
}
} // Constraints = "$src1 = $dst"
@@ -3009,6 +2933,13 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))]>;
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>;
// Move Int Doubleword to Single Scalar
@@ -3051,6 +2982,21 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>;
+def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>;
+def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+
+def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
// Move Scalar Single to Double Int
let isAsmParserOnly = 1 in {
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
@@ -3532,18 +3478,6 @@ let Constraints = "$src1 = $dst" in {
// SSSE3 - Packed Absolute Instructions
//===---------------------------------------------------------------------===//
-/// SS3I_unop_rm_int_mm - Simple SSSE3 unary whose type can be v*{i8,i16,i32}.
-multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag64, Intrinsic IntId64> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst,
- (IntId64 (bitconvert (mem_frag64 addr:$src))))]>;
-}
/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
@@ -3572,19 +3506,11 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in {
}
defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
- int_x86_ssse3_pabs_b_128>,
- SS3I_unop_rm_int_mm<0x1C, "pabsb", memopv8i8,
- int_x86_ssse3_pabs_b>;
-
+ int_x86_ssse3_pabs_b_128>;
defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
- int_x86_ssse3_pabs_w_128>,
- SS3I_unop_rm_int_mm<0x1D, "pabsw", memopv4i16,
- int_x86_ssse3_pabs_w>;
-
+ int_x86_ssse3_pabs_w_128>;
defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
- int_x86_ssse3_pabs_d_128>,
- SS3I_unop_rm_int_mm<0x1E, "pabsd", memopv2i32,
- int_x86_ssse3_pabs_d>;
+ int_x86_ssse3_pabs_d_128>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
@@ -3611,20 +3537,6 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
(IntId128 VR128:$src1,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag64, Intrinsic IntId64> {
- let isCommutable = 1 in
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv8i8 addr:$src2))))]>;
-}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
let isCommutable = 0 in {
@@ -3659,54 +3571,30 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
- int_x86_ssse3_phadd_w_128>,
- SS3I_binop_rm_int_mm<0x01, "phaddw", memopv4i16,
- int_x86_ssse3_phadd_w>;
+ int_x86_ssse3_phadd_w_128>;
defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
- int_x86_ssse3_phadd_d_128>,
- SS3I_binop_rm_int_mm<0x02, "phaddd", memopv2i32,
- int_x86_ssse3_phadd_d>;
+ int_x86_ssse3_phadd_d_128>;
defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
- int_x86_ssse3_phadd_sw_128>,
- SS3I_binop_rm_int_mm<0x03, "phaddsw", memopv4i16,
- int_x86_ssse3_phadd_sw>;
+ int_x86_ssse3_phadd_sw_128>;
defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
- int_x86_ssse3_phsub_w_128>,
- SS3I_binop_rm_int_mm<0x05, "phsubw", memopv4i16,
- int_x86_ssse3_phsub_w>;
+ int_x86_ssse3_phsub_w_128>;
defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
- int_x86_ssse3_phsub_d_128>,
- SS3I_binop_rm_int_mm<0x06, "phsubd", memopv2i32,
- int_x86_ssse3_phsub_d>;
+ int_x86_ssse3_phsub_d_128>;
defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
- int_x86_ssse3_phsub_sw_128>,
- SS3I_binop_rm_int_mm<0x07, "phsubsw", memopv4i16,
- int_x86_ssse3_phsub_sw>;
+ int_x86_ssse3_phsub_sw_128>;
defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
- int_x86_ssse3_pmadd_ub_sw_128>,
- SS3I_binop_rm_int_mm<0x04, "pmaddubsw", memopv8i8,
- int_x86_ssse3_pmadd_ub_sw>;
- defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8,
- int_x86_ssse3_pshuf_b_128>,
- SS3I_binop_rm_int_mm<0x00, "pshufb", memopv8i8,
- int_x86_ssse3_pshuf_b>;
+ int_x86_ssse3_pmadd_ub_sw_128>;
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
+ int_x86_ssse3_pshuf_b_128>;
defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
- int_x86_ssse3_psign_b_128>,
- SS3I_binop_rm_int_mm<0x08, "psignb", memopv8i8,
- int_x86_ssse3_psign_b>;
+ int_x86_ssse3_psign_b_128>;
defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
- int_x86_ssse3_psign_w_128>,
- SS3I_binop_rm_int_mm<0x09, "psignw", memopv4i16,
- int_x86_ssse3_psign_w>;
+ int_x86_ssse3_psign_w_128>;
defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
- int_x86_ssse3_psign_d_128>,
- SS3I_binop_rm_int_mm<0x0A, "psignd", memopv2i32,
- int_x86_ssse3_psign_d>;
+ int_x86_ssse3_psign_d_128>;
}
defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
- int_x86_ssse3_pmul_hr_sw_128>,
- SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", memopv4i16,
- int_x86_ssse3_pmul_hr_sw>;
+ int_x86_ssse3_pmul_hr_sw_128>;
}
def : Pat<(X86pshufb VR128:$src, VR128:$mask),
@@ -3714,19 +3602,17 @@ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
-multiclass ssse3_palign_mm<string asm> {
- def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2, i8imm:$src3),
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
- def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
-}
-
multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
@@ -3747,28 +3633,9 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
let isAsmParserOnly = 1, Predicates = [HasAVX] in
defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
- defm PALIGN : ssse3_palign<"palignr">,
- ssse3_palign_mm<"palignr">;
+ defm PALIGN : ssse3_palign<"palignr">;
let AddedComplexity = 5 in {
-
-def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
-
def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
(PALIGNR128rr VR128:$src2, VR128:$src1,
(SHUFFLE_get_palign_imm VR128:$src3))>,
@@ -3792,10 +3659,27 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
//===---------------------------------------------------------------------===//
// Thread synchronization
-def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
- [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
-def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait",
- [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
+let usesCustomInserter = 1 in {
+def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
+ [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
+def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
+ [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
+}
+
+let Uses = [EAX, ECX, EDX] in
+def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
+ Requires<[HasSSE3]>;
+let Uses = [ECX, EAX] in
+def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
+ Requires<[HasSSE3]>;
+
+def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
+def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
+
+def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
+ Requires<[In32BitMode]>;
+def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
+ Requires<[In64BitMode]>;
//===---------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -3811,7 +3695,7 @@ let Predicates = [HasSSE2] in
(CVTSS2SDrm addr:$src)>;
// bit_convert
-let Predicates = [HasSSE2] in {
+let Predicates = [HasXMMInt] in {
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
@@ -3844,6 +3728,10 @@ let Predicates = [HasSSE2] in {
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
}
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+}
+
// Move scalar to XMM zero-extended
// movd to XMM register zero-extends
let AddedComplexity = 15 in {
@@ -4017,36 +3905,11 @@ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
-// Some special case pandn patterns.
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
- VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
- VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
- VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
- (memop addr:$src2))),
- (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
- (memop addr:$src2))),
- (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
- (memop addr:$src2))),
- (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-
// vector -> vector casts
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
- (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))),
- (Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
- (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
+ (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
// Use movaps / movups for SSE integer load / store (one byte shorter).
let Predicates = [HasSSE1] in {
@@ -4504,7 +4367,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
Intrinsic V4F32Int, Intrinsic V2F64Int> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
- def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
+ def PSr : SS4AIi8<opcps, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4512,7 +4375,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
OpSize;
// Vector intrinsic operation, mem
- def PSm_Int : Ii8<opcps, MRMSrcMem,
+ def PSm : Ii8<opcps, MRMSrcMem,
(outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4522,7 +4385,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
- def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
+ def PDr : SS4AIi8<opcpd, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4530,7 +4393,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
OpSize;
// Vector intrinsic operation, mem
- def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
+ def PDm : SS4AIi8<opcpd, MRMSrcMem,
(outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4543,28 +4406,28 @@ multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
- def PSr : SS4AIi8<opcps, MRMSrcReg,
+ def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// Vector intrinsic operation, mem
- def PSm : Ii8<opcps, MRMSrcMem,
+ def PSm_AVX : Ii8<opcps, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TA, OpSize, Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
- def PDr : SS4AIi8<opcpd, MRMSrcReg,
+ def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// Vector intrinsic operation, mem
- def PDm : SS4AIi8<opcpd, MRMSrcMem,
+ def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4576,7 +4439,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
Intrinsic F32Int,
Intrinsic F64Int, bit Is2Addr = 1> {
// Intrinsic operation, reg.
- def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+ def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -4587,7 +4450,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
OpSize;
// Intrinsic operation, mem.
- def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+ def SSm : SS4AIi8<opcss, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -4599,7 +4462,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
OpSize;
// Intrinsic operation, reg.
- def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+ def SDr : SS4AIi8<opcsd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -4610,7 +4473,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
OpSize;
// Intrinsic operation, mem.
- def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
+ def SDm : SS4AIi8<opcsd, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -4625,28 +4488,28 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
string OpcodeStr> {
// Intrinsic operation, reg.
- def SSr : SS4AIi8<opcss, MRMSrcReg,
+ def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
// Intrinsic operation, mem.
- def SSm : SS4AIi8<opcss, MRMSrcMem,
+ def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
// Intrinsic operation, reg.
- def SDr : SS4AIi8<opcsd, MRMSrcReg,
+ def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
// Intrinsic operation, mem.
- def SDm : SS4AIi8<opcsd, MRMSrcMem,
+ def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -4743,6 +4606,29 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
// SSE4.1 - Misc Instructions
//===----------------------------------------------------------------------===//
+def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS;
+def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS;
+
+def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop GR32:$src))]>, XS;
+def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS;
+
+def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop GR64:$src))]>, XS;
+def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS;
+
+
+
// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
Intrinsic IntId128> {
@@ -4981,6 +4867,9 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
+ (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
+
let isAsmParserOnly = 1, Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
@@ -5032,12 +4921,12 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
// Packed Compare Implicit Length Strings, Return Mask
multiclass pseudo_pcmpistrm<string asm> {
- def REG : Ii8<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"),
+ def REG : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
imm:$src3))]>;
- def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"),
+ def MEM : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128
VR128:$src1, (load addr:$src2), imm:$src3))]>;
}
@@ -5068,12 +4957,12 @@ let Defs = [XMM0, EFLAGS] in {
// Packed Compare Explicit Length Strings, Return Mask
multiclass pseudo_pcmpestrm<string asm> {
- def REG : Ii8<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"),
+ def REG : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
- def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"),
+ def MEM : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
}
@@ -5555,6 +5444,23 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
(VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
@@ -5562,6 +5468,23 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4f32 (VEXTRACTF128rr
+ (v8f32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2f64 (VEXTRACTF128rr
+ (v4f64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTF128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
(VBROADCASTF128 addr:$src)>;
@@ -5673,19 +5596,14 @@ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
def : Pat<(X86Movddup (memopv2f64 addr:$src)),
(MOVDDUPrm addr:$src)>;
-def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
- (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
(MOVDDUPrm addr:$src)>;
-def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
(MOVDDUPrm addr:$src)>;
def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
@@ -5700,6 +5618,7 @@ def : Pat<(X86Movddup (bc_v2f64
(v2i64 (scalar_to_vector (loadi64 addr:$src))))),
(MOVDDUPrm addr:$src)>;
+
// Shuffle with UNPCKLPS
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
@@ -5724,9 +5643,9 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
// Shuffle with UNPCKLPD
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
- (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+ (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
- (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+ (UNPCKLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
@@ -5735,9 +5654,9 @@ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
// Shuffle with UNPCKHPD
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
- (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+ (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
- (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+ (UNPCKHPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
@@ -5812,10 +5731,18 @@ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
(MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Movddup VR128:$src)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>;
+
// Shuffle with MOVLHPD
def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
+
// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
@@ -5878,31 +5805,18 @@ def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
(MOVSLDUPrm addr:$src)>;
// Shuffle with PSHUFHW
-def : Pat<(v8i16 (X86PShufhwLd addr:$src, (i8 imm:$imm))),
- (PSHUFHWmi addr:$src, imm:$imm)>;
def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
(PSHUFHWri VR128:$src, imm:$imm)>;
def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
(PSHUFHWmi addr:$src, imm:$imm)>;
// Shuffle with PSHUFLW
-def : Pat<(v8i16 (X86PShuflwLd addr:$src, (i8 imm:$imm))),
- (PSHUFLWmi addr:$src, imm:$imm)>;
def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
(PSHUFLWri VR128:$src, imm:$imm)>;
def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
(PSHUFLWmi addr:$src, imm:$imm)>;
// Shuffle with PALIGN
-def : Pat<(v1i64 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
- (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
-def : Pat<(v2i32 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
- (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
-def : Pat<(v4i16 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
- (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
-def : Pat<(v8i8 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
- (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
-
def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
@@ -5920,6 +5834,15 @@ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
def : Pat<(X86Movlps VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(MOVLPSrm VR128:$src1, addr:$src2)>;
+// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+
+def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
// Shuffle with MOVLPD
def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
new file mode 100644
index 0000000..8278568
--- /dev/null
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -0,0 +1,746 @@
+//===- X86InstrShiftRotate.td - Shift and Rotate Instrs ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the shift and rotate instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: Someone needs to smear multipattern goodness all over this file.
+
+let Defs = [EFLAGS] in {
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "shl{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+ "shl{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+ "shl{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (shl GR32:$src1, CL))]>;
+def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+ "shl{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (shl GR64:$src1, CL))]>;
+} // Uses = [CL]
+
+def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "shl{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shl{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shl{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
+def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "shl{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+
+// NOTE: We don't include patterns for shifts of a register by one, because
+// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
+def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
+ "shl{b}\t$dst", []>;
+def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+ "shl{w}\t$dst", []>, OpSize;
+def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+ "shl{l}\t$dst", []>;
+def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+ "shl{q}\t$dst", []>;
+} // isConvertibleToThreeAddress = 1
+} // Constraints = "$src = $dst"
+
+
+// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
+// using CL?
+let Uses = [CL] in {
+def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shl{b}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shl{w}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shl{l}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shl{q}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t$dst",
+ [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t$dst",
+ [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t$dst",
+ [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t$dst",
+ [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "shr{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+ "shr{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+ "shr{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (srl GR32:$src1, CL))]>;
+def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (srl GR64:$src1, CL))]>;
+}
+
+def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "shr{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
+def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shr{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shr{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "shr{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift right by 1
+def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
+ "shr{b}\t$dst",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+ "shr{w}\t$dst",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
+def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+ "shr{l}\t$dst",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
+def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t$dst",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
+ OpSize;
+def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shr{b}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shr{w}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shr{l}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shr{q}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t$dst",
+ [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t$dst",
+ [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
+def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t$dst",
+ [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t$dst",
+ [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "sar{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+ "sar{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+ "sar{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (sra GR32:$src1, CL))]>;
+def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+ "sar{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (sra GR64:$src1, CL))]>;
+}
+
+def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "sar{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
+def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "sar{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "sar{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
+def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "sar{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "sar{b}\t$dst",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+ "sar{w}\t$dst",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
+def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+ "sar{l}\t$dst",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
+def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+ "sar{q}\t$dst",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "sar{b}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "sar{w}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "sar{l}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "sar{q}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t$dst",
+ [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t$dst",
+ [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t$dst",
+ [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t$dst",
+ [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Rotate instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcl{b}\t$dst", []>;
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcl{w}\t$dst", []>, OpSize;
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcl{l}\t$dst", []>;
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcl{q}\t$dst", []>;
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcr{b}\t$dst", []>;
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcr{w}\t$dst", []>, OpSize;
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcr{l}\t$dst", []>;
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcr{q}\t$dst", []>;
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+
+} // Constraints = "$src = $dst"
+
+def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
+ "rcl{b}\t$dst", []>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
+ "rcl{w}\t$dst", []>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
+ "rcl{l}\t$dst", []>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
+ "rcl{q}\t$dst", []>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
+ "rcr{b}\t$dst", []>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
+ "rcr{w}\t$dst", []>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
+ "rcr{l}\t$dst", []>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
+ "rcr{q}\t$dst", []>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+let Uses = [CL] in {
+def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+}
+
+let Constraints = "$src1 = $dst" in {
+// FIXME: provide shorter instructions when imm8 == 1
+let Uses = [CL] in {
+def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "rol{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "rol{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "rol{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
+def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+ "rol{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
+}
+
+def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "rol{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "rol{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "rol{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
+def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "rol{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "rol{b}\t$dst",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "rol{w}\t$dst",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
+def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "rol{l}\t$dst",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
+def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+ "rol{q}\t$dst",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
+ "rol{b}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
+ "rol{w}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>,
+ OpSize;
+def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
+ "rol{l}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
+ "rol{q}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+
+// Rotate by 1
+def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t$dst",
+ [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t$dst",
+ [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t$dst",
+ [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t$dst",
+ [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "ror{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "ror{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "ror{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
+def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+ "ror{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
+}
+
+def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "ror{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
+def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "ror{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "ror{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
+def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "ror{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "ror{b}\t$dst",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "ror{w}\t$dst",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
+def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "ror{l}\t$dst",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
+def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+ "ror{q}\t$dst",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "ror{b}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "ror{w}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "ror{l}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "ror{q}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Rotate by 1
+def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t$dst",
+ [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t$dst",
+ [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t$dst",
+ [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t$dst",
+ [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+
+//===----------------------------------------------------------------------===//
+// Double shift instructions (generalizations of rotate)
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+
+let Uses = [CL] in {
+def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+ TB, OpSize;
+def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+ TB, OpSize;
+def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>,
+ TB;
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
+ TB;
+}
+
+let isCommutable = 1 in { // These instructions commute to each other.
+def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
+ (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
+ (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+}
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize;
+def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize;
+
+def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>, TB;
+def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>, TB;
+
+def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
+def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
+}
+
+def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+
+def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+
+def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+
+} // Defs = [EFLAGS]
+
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
new file mode 100644
index 0000000..1a58ba0
--- /dev/null
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -0,0 +1,390 @@
+//===- X86InstrSystem.td - System Instructions -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instructions that are generally used in
+// privileged modes. These are not typically used by the compiler, but are
+// supported for the assembler and disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+let Defs = [RAX, RDX] in
+ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
+
+let Defs = [RAX, RCX, RDX] in
+ def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+
+// CPU flow control instructions
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
+ def UD2B : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB;
+}
+
+def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
+def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
+
+// Interrupt and SysCall Instructions.
+let Uses = [EFLAGS] in
+ def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
+ [(int_x86_int (i8 3))]>;
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+ [(int_x86_int imm:$trap)]>;
+
+def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
+def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
+def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+ Requires<[In64BitMode]>;
+
+def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
+
+def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+ Requires<[In32BitMode]>;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+ Requires<[In64BitMode]>;
+
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize;
+def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
+def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
+ Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+// Input/Output Instructions.
+//
+let Defs = [AL], Uses = [DX] in
+def IN8rr : I<0xEC, RawFrm, (outs), (ins),
+ "in{b}\t{%dx, %al|%AL, %DX}", []>;
+let Defs = [AX], Uses = [DX] in
+def IN16rr : I<0xED, RawFrm, (outs), (ins),
+ "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize;
+let Defs = [EAX], Uses = [DX] in
+def IN32rr : I<0xED, RawFrm, (outs), (ins),
+ "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
+
+let Defs = [AL] in
+def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
+ "in{b}\t{$port, %al|%AL, $port}", []>;
+let Defs = [AX] in
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+ "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
+let Defs = [EAX] in
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+ "in{l}\t{$port, %eax|%EAX, $port}", []>;
+
+let Uses = [DX, AL] in
+def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
+ "out{b}\t{%al, %dx|%DX, %AL}", []>;
+let Uses = [DX, AX] in
+def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
+let Uses = [DX, EAX] in
+def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
+
+let Uses = [AL] in
+def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
+ "out{b}\t{%al, $port|$port, %AL}", []>;
+let Uses = [AX] in
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+ "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
+let Uses = [EAX] in
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+ "out{l}\t{%eax, $port|$port, %EAX}", []>;
+
+def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>;
+def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>, OpSize;
+def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", []>;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from debug registers
+
+def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from control registers
+
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Segment override instruction prefixes
+
+def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
+def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
+def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
+def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
+def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
+def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Moves to and from segment registers.
+//
+
+def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+//===----------------------------------------------------------------------===//
+// Segmentation support instructions.
+
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
+
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
+
+def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB;
+def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB;
+def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
+ "ltr{w}\t{$src}", []>, TB;
+def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
+ "ltr{w}\t{$src}", []>, TB;
+
+def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
+ "push{w}\t%cs", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins),
+ "push{l}\t%cs", []>, Requires<[In32BitMode]>;
+def PUSHSS16 : I<0x16, RawFrm, (outs), (ins),
+ "push{w}\t%ss", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHSS32 : I<0x16, RawFrm, (outs), (ins),
+ "push{l}\t%ss", []>, Requires<[In32BitMode]>;
+def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins),
+ "push{w}\t%ds", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins),
+ "push{l}\t%ds", []>, Requires<[In32BitMode]>;
+def PUSHES16 : I<0x06, RawFrm, (outs), (ins),
+ "push{w}\t%es", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHES32 : I<0x06, RawFrm, (outs), (ins),
+ "push{l}\t%es", []>, Requires<[In32BitMode]>;
+
+def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
+ "push{w}\t%fs", []>, OpSize, TB;
+def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
+ "push{l}\t%fs", []>, TB, Requires<[In32BitMode]>;
+def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
+ "push{w}\t%gs", []>, OpSize, TB;
+def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
+ "push{l}\t%gs", []>, TB, Requires<[In32BitMode]>;
+
+def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
+ "push{q}\t%fs", []>, TB;
+def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
+ "push{q}\t%gs", []>, TB;
+
+// No "pop cs" instruction.
+def POPSS16 : I<0x17, RawFrm, (outs), (ins),
+ "pop{w}\t%ss", []>, OpSize, Requires<[In32BitMode]>;
+def POPSS32 : I<0x17, RawFrm, (outs), (ins),
+ "pop{l}\t%ss", []> , Requires<[In32BitMode]>;
+
+def POPDS16 : I<0x1F, RawFrm, (outs), (ins),
+ "pop{w}\t%ds", []>, OpSize, Requires<[In32BitMode]>;
+def POPDS32 : I<0x1F, RawFrm, (outs), (ins),
+ "pop{l}\t%ds", []> , Requires<[In32BitMode]>;
+
+def POPES16 : I<0x07, RawFrm, (outs), (ins),
+ "pop{w}\t%es", []>, OpSize, Requires<[In32BitMode]>;
+def POPES32 : I<0x07, RawFrm, (outs), (ins),
+ "pop{l}\t%es", []> , Requires<[In32BitMode]>;
+
+def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{w}\t%fs", []>, OpSize, TB;
+def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{l}\t%fs", []>, TB , Requires<[In32BitMode]>;
+def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{q}\t%fs", []>, TB;
+
+def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{w}\t%gs", []>, OpSize, TB;
+def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{l}\t%gs", []>, TB , Requires<[In32BitMode]>;
+def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{q}\t%gs", []>, TB;
+
+
+def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lds{l}\t{$src, $dst|$dst, $src}", []>;
+
+def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "les{l}\t{$src, $dst|$dst, $src}", []>;
+
+def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+
+def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
+ "verr\t$seg", []>, TB;
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
+ "verr\t$seg", []>, TB;
+def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
+ "verw\t$seg", []>, TB;
+def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
+ "verw\t$seg", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Descriptor-table support instructions
+
+def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+ "sgdtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+ "sgdt\t$dst", []>, TB;
+def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+ "sidtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+ "sidt\t$dst", []>, TB;
+def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
+ "sldt{w}\t$dst", []>, TB, OpSize;
+def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
+ "sldt{w}\t$dst", []>, TB;
+def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
+ "sldt{l}\t$dst", []>, TB;
+
+// LLDT is not interpreted specially in 64-bit mode because there is no sign
+// extension.
+def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
+ "sldt{q}\t$dst", []>, TB;
+def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
+ "sldt{q}\t$dst", []>, TB;
+
+def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+ "lgdtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+ "lgdt\t$src", []>, TB;
+def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+ "lidtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+ "lidt\t$src", []>, TB;
+def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
+ "lldt{w}\t$src", []>, TB;
+def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
+ "lldt{w}\t$src", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Specialized register support
+def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
+def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
+def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
+
+def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
+ "smsw{w}\t$dst", []>, OpSize, TB;
+def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
+ "smsw{l}\t$dst", []>, TB;
+// no m form encodable; use SMSW16m
+def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
+ "smsw{q}\t$dst", []>, TB;
+
+// For memory operands, there is only a 16-bit form
+def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
+ "smsw{w}\t$dst", []>, TB;
+
+def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
+ "lmsw{w}\t$src", []>, TB;
+def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
+ "lmsw{w}\t$src", []>, TB;
+
+def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Cache instructions
+def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
+
diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td
new file mode 100644
index 0000000..daf61e4
--- /dev/null
+++ b/lib/Target/X86/X86InstrVMX.td
@@ -0,0 +1,54 @@
+//===- X86InstrVMX.td - VMX Instruction Set Extension ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel VMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VMX instructions
+
+// 66 0F 38 80
+def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
+// 66 0F 38 81
+def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
+// 0F 01 C1
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
+def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+ "vmclear\t$vmcs", []>, OpSize, TB;
+// 0F 01 C2
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
+// 0F 01 C3
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
+def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+ "vmptrld\t$vmcs", []>, TB;
+def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
+ "vmptrst\t$vmcs", []>, TB;
+def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// 0F 01 C4
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
+def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
+ "vmxon\t{$vmxon}", []>, XS;
+
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 6f0a8d9..3f88fa6 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -19,7 +19,7 @@
#include "llvm/Function.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/System/Valgrind.h"
+#include "llvm/Support/Valgrind.h"
#include <cstdlib>
#include <cstring>
using namespace llvm;
@@ -127,9 +127,17 @@ extern "C" {
"movaps %xmm6, 96(%rsp)\n"
"movaps %xmm7, 112(%rsp)\n"
// JIT callee
+#ifdef _WIN64
+ "subq $32, %rsp\n"
+ "movq %rbp, %rcx\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rdx\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+ "addq $32, %rsp\n"
+#else
"movq %rbp, %rdi\n" // Pass prev frame and return address
"movq 8(%rbp), %rsi\n"
"call " ASMPREFIX "X86CompilationCallback2\n"
+#endif
// Restore all XMM arg registers
"movaps 112(%rsp), %xmm7\n"
"movaps 96(%rsp), %xmm6\n"
@@ -333,11 +341,11 @@ extern "C" {
extern "C" {
#if !(defined (X86_64_JIT) && defined(_MSC_VER))
// the following function is called only from this translation unit,
- // unless we are under 64bit Windows with MSC, where there is
+ // unless we are under 64bit Windows with MSC, where there is
// no support for inline assembly
static
#endif
-void ATTRIBUTE_USED
+void LLVM_ATTRIBUTE_USED
X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
intptr_t *RetAddrLoc = &StackPtr[1];
assert(*RetAddrLoc == RetAddr &&
@@ -462,7 +470,7 @@ TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
JITCodeEmitter &JCE) {
- // Note, we cast to intptr_t here to silence a -pedantic warning that
+ // Note, we cast to intptr_t here to silence a -pedantic warning that
// complains about casting a function pointer to a normal pointer.
#if defined (X86_32_JIT) && !defined (_MSC_VER)
bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index 36badb4..6686214 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
enum AsmWriterFlavorTy {
@@ -68,7 +69,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
DwarfUsesInlineInfoSection = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::Dwarf;
+ ExceptionsType = ExceptionHandling::DwarfTable;
}
X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
@@ -88,8 +89,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
SupportsDebugInformation = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::Dwarf;
-
+ ExceptionsType = ExceptionHandling::DwarfTable;
+
// OpenBSD has buggy support for .quad in 32-bit mode, just split into two
// .words.
if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
@@ -98,13 +99,15 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
const MCSection *X86ELFMCAsmInfo::
getNonexecutableStackSection(MCContext &Ctx) const {
- return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
- 0, SectionKind::getMetadata(), false);
+ return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+ 0, SectionKind::getMetadata());
}
X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
- if (Triple.getArch() == Triple::x86_64)
+ if (Triple.getArch() == Triple::x86_64) {
GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ }
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 9564fe0..e6dc74e 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -11,13 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "x86-emitter"
+#define DEBUG_TYPE "mccodeemitter"
#include "X86.h"
#include "X86InstrInfo.h"
#include "X86FixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -37,27 +38,6 @@ public:
~X86MCCodeEmitter() {}
- unsigned getNumFixupKinds() const {
- return 5;
- }
-
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
- const static MCFixupKindInfo Infos[] = {
- { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }
- };
-
- if (Kind < FirstTargetFixupKind)
- return MCCodeEmitter::getFixupKindInfo(Kind);
-
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
- }
-
static unsigned GetX86RegNum(const MCOperand &MO) {
return X86RegisterInfo::getX86RegNum(MO.getReg());
}
@@ -170,41 +150,77 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
unsigned Size = X86II::getSizeOfImm(TSFlags);
bool isPCRel = X86II::isImmPCRel(TSFlags);
- switch (Size) {
- default: assert(0 && "Unknown immediate size");
- case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1;
- case 2: return isPCRel ? MCFixupKind(X86::reloc_pcrel_2byte) : FK_Data_2;
- case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4;
- case 8: assert(!isPCRel); return FK_Data_8;
- }
+ return MCFixup::getKindForSize(Size, isPCRel);
+}
+
+/// Is32BitMemOperand - Return true if the specified instruction with a memory
+/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
+/// memory operand. Op specifies the operand # of the memoperand.
+static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg())))
+ return true;
+ return false;
}
+/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
+/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
+/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
+/// of a binary expression.
+static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+ if (Expr->getKind() == MCExpr::Binary) {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
+ Expr = BE->getLHS();
+ }
+
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ return false;
+
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const MCSymbol &S = Ref->getSymbol();
+ return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+}
void X86MCCodeEmitter::
EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
unsigned &CurByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
- // If this is a simple integer displacement that doesn't require a relocation,
- // emit it now.
+ const MCExpr *Expr = NULL;
if (DispOp.isImm()) {
- // FIXME: is this right for pc-rel encoding?? Probably need to emit this as
- // a fixup if so.
- EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
- return;
+ // If this is a simple integer displacement that doesn't require a relocation,
+ // emit it now.
+ if (FixupKind != FK_PCRel_1 &&
+ FixupKind != FK_PCRel_2 &&
+ FixupKind != FK_PCRel_4) {
+ EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
+ return;
+ }
+ Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx);
+ } else {
+ Expr = DispOp.getExpr();
}
// If we have an immoffset, add it to the expression.
- const MCExpr *Expr = DispOp.getExpr();
+ if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) {
+ assert(ImmOffset == 0);
+
+ FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+ ImmOffset = CurByte;
+ }
// If the fixup is pc-relative, we need to bias the value to be relative to
// the start of the field, not the end of the field.
- if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
+ if (FixupKind == FK_PCRel_4 ||
FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
ImmOffset -= 4;
- if (FixupKind == MCFixupKind(X86::reloc_pcrel_2byte))
+ if (FixupKind == FK_PCRel_2)
ImmOffset -= 2;
- if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte))
+ if (FixupKind == FK_PCRel_1)
ImmOffset -= 1;
if (ImmOffset)
@@ -221,10 +237,10 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const{
- const MCOperand &Disp = MI.getOperand(Op+3);
- const MCOperand &Base = MI.getOperand(Op);
- const MCOperand &Scale = MI.getOperand(Op+1);
- const MCOperand &IndexReg = MI.getOperand(Op+2);
+ const MCOperand &Disp = MI.getOperand(Op+X86::AddrDisp);
+ const MCOperand &Base = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &Scale = MI.getOperand(Op+X86::AddrScaleAmt);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
unsigned BaseReg = Base.getReg();
// Handle %rip relative addressing.
@@ -238,8 +254,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// movq loads are handled with a special relocation form which allows the
// linker to eliminate some loads for GOT references which end up in the
// same linkage unit.
- if (MI.getOpcode() == X86::MOV64rm ||
- MI.getOpcode() == X86::MOV64rm_TC)
+ if (MI.getOpcode() == X86::MOV64rm)
FixupKind = X86::reloc_riprel_4byte_movq_load;
// rip-relative addressing is actually relative to the *next* instruction.
@@ -295,7 +310,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
- EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+ EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+ Fixups);
return;
}
@@ -355,7 +371,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
if (ForceDisp8)
EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
else if (ForceDisp32 || Disp.getImm() != 0)
- EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+ EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+ Fixups);
}
/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
@@ -708,14 +725,15 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if ((TSFlags & X86II::Op0Mask) == X86II::REP)
EmitByte(0xF3, CurByte, OS);
+ // Emit the address size opcode prefix as needed.
+ if ((TSFlags & X86II::AdSize) ||
+ (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand)))
+ EmitByte(0x67, CurByte, OS);
+
// Emit the operand size opcode prefix as needed.
if (TSFlags & X86II::OpSize)
EmitByte(0x66, CurByte, OS);
- // Emit the address size opcode prefix as needed.
- if (TSFlags & X86II::AdSize)
- EmitByte(0x67, CurByte, OS);
-
bool Need0FPrefix = false;
switch (TSFlags & X86II::Op0Mask) {
default: assert(0 && "Invalid prefix!");
@@ -806,6 +824,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if ((TSFlags >> 32) & X86II::VEX_4V)
HasVEX_4V = true;
+
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
if (MemoryOperand != -1) MemoryOperand += CurOp;
@@ -815,7 +834,12 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
else
EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+
unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+
+ if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+ BaseOpcode = 0x0F; // Weird 3DNow! encoding.
+
unsigned SrcRegNum = 0;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg:
@@ -828,6 +852,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(BaseOpcode, CurByte, OS);
break;
+ case X86II::RawFrmImm8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups);
+ break;
case X86II::RawFrmImm16:
EmitByte(BaseOpcode, CurByte, OS);
EmitImmediate(MI.getOperand(CurOp++),
@@ -963,12 +994,24 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
RegNum |= GetX86RegNum(MO) << 4;
EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
Fixups);
- } else
+ } else {
+ unsigned FixupKind;
+ // FIXME: Is there a better way to know that we need a signed relocation?
+ if (MI.getOpcode() == X86::MOV64ri32 ||
+ MI.getOpcode() == X86::MOV64mi32 ||
+ MI.getOpcode() == X86::PUSH64i32)
+ FixupKind = X86::reloc_signed_4byte;
+ else
+ FixupKind = getImmFixupKind(TSFlags);
EmitImmediate(MI.getOperand(CurOp++),
- X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind),
CurByte, OS, Fixups);
+ }
}
+ if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+ EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+
#ifndef NDEBUG
// FIXME: Verify.
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 8c4620f..cbe6db2 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "InstPrinter/X86ATTInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86AsmPrinter.h"
#include "X86COFFMachineModuleInfo.h"
@@ -38,11 +39,6 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
}
-MCSymbol *X86MCInstLower::GetPICBaseSymbol() const {
- return static_cast<const X86TargetLowering*>(TM.getTargetLowering())->
- getPICBaseSymbol(&MF, Ctx);
-}
-
/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
/// operand to an MCSymbol.
MCSymbol *X86MCInstLower::
@@ -154,7 +150,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
// Subtract the pic base.
Expr = MCBinaryExpr::CreateSub(Expr,
- MCSymbolRefExpr::Create(GetPICBaseSymbol(),
+ MCSymbolRefExpr::Create(MF.getPICBaseSymbol(),
Ctx),
Ctx);
break;
@@ -173,7 +169,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Expr = MCSymbolRefExpr::Create(Sym, Ctx);
// Subtract the pic base.
Expr = MCBinaryExpr::CreateSub(Expr,
- MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx),
+ MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
Ctx);
if (MO.isJTI() && MAI.hasSetDirective()) {
// If .set directive is supported, use it to reduce the number of
@@ -326,8 +322,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MO.getMBB()->getSymbol(), Ctx));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
- break;
case MachineOperand::MO_ExternalSymbol:
MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
break;
@@ -347,6 +341,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
}
// Handle a few special cases to eliminate operand modifiers.
+ReSimplify:
switch (OutMI.getOpcode()) {
case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
lower_lea64_32mem(&OutMI, 1);
@@ -377,11 +372,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
- case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break;
- case X86::MMX_V_SETALLONES:
- LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+ case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
@@ -417,6 +411,13 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
}
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ OutMI = MCInst();
+ OutMI.setOpcode(X86::RET);
+ break;
+ }
+
// TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
case X86::TAILJMPr:
case X86::TAILJMPd:
@@ -436,6 +437,19 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
}
+ // These are pseudo-ops for OR to help with the OR->ADD transformation. We do
+ // this with an ugly goto in case the resultant OR uses EAX and needs the
+ // short form.
+ case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
+ case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
+ case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
+ case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
+ case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
+ case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
+ case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
+ case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
+ case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
+
// The assembler backend wants to see branches in their small form and relax
// them to their large form. The JIT can only handle the large form because
// it does not do relaxation. For now, translate the large form to the
@@ -513,6 +527,66 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
}
}
+static void LowerTlsAddr(MCStreamer &OutStreamer,
+ X86MCInstLower &MCInstLowering,
+ const MachineInstr &MI) {
+ bool is64Bits = MI.getOpcode() == X86::TLS_addr64;
+ MCContext &context = OutStreamer.getContext();
+
+ if (is64Bits) {
+ MCInst prefix;
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ }
+ MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
+ const MCSymbolRefExpr *symRef =
+ MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context);
+
+ MCInst LEA;
+ if (is64Bits) {
+ LEA.setOpcode(X86::LEA64r);
+ LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest
+ LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(0)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ } else {
+ LEA.setOpcode(X86::LEA32r);
+ LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
+ LEA.addOperand(MCOperand::CreateReg(0)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ }
+ OutStreamer.EmitInstruction(LEA);
+
+ if (is64Bits) {
+ MCInst prefix;
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ prefix.setOpcode(X86::REX64_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ }
+
+ MCInst call;
+ if (is64Bits)
+ call.setOpcode(X86::CALL64pcrel32);
+ else
+ call.setOpcode(X86::CALLpcrel32);
+ StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
+ MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
+ const MCSymbolRefExpr *tlsRef =
+ MCSymbolRefExpr::Create(tlsGetAddr,
+ MCSymbolRefExpr::VK_PLT,
+ context);
+
+ call.addOperand(MCOperand::CreateExpr(tlsRef));
+ OutStreamer.EmitInstruction(call);
+}
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(Mang, *MF, *this);
@@ -532,13 +606,26 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
return;
+
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ // Lower these as normal, but add some comments.
+ unsigned Reg = MI->getOperand(0).getReg();
+ OutStreamer.AddComment(StringRef("eh_return, addr: %") +
+ X86ATTInstPrinter::getRegisterName(Reg));
+ break;
+ }
case X86::TAILJMPr:
case X86::TAILJMPd:
case X86::TAILJMPd64:
// Lower these as normal, but add some comments.
OutStreamer.AddComment("TAILCALL");
break;
-
+
+ case X86::TLS_addr32:
+ case X86::TLS_addr64:
+ return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
+
case X86::MOVPC32r: {
MCInst TmpInst;
// This is a pseudo op for a two instruction sequence with a label, which
@@ -548,7 +635,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// popl %esi
// Emit the call.
- MCSymbol *PICBase = MCInstLowering.GetPICBaseSymbol();
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
TmpInst.setOpcode(X86::CALLpcrel32);
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
@@ -586,7 +673,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
const MCExpr *PICBase =
- MCSymbolRefExpr::Create(MCInstLowering.GetPICBaseSymbol(), OutContext);
+ MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
index 539b09b..0210072 100644
--- a/lib/Target/X86/X86MCInstLower.h
+++ b/lib/Target/X86/X86MCInstLower.h
@@ -40,8 +40,6 @@ public:
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
- MCSymbol *GetPICBaseSymbol() const;
-
MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
diff --git a/lib/Target/X86/X86MachObjectWriter.cpp b/lib/Target/X86/X86MachObjectWriter.cpp
new file mode 100644
index 0000000..8f3dd32
--- /dev/null
+++ b/lib/Target/X86/X86MachObjectWriter.cpp
@@ -0,0 +1,32 @@
+//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+using namespace llvm;
+
+namespace {
+class X86MachObjectWriter : public MCMachObjectTargetWriter {
+public:
+ X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+};
+}
+
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
+ CPUType,
+ CPUSubtype),
+ OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index fedd49e..2f6bd88 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -31,7 +31,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -41,7 +41,7 @@
#include "llvm/Support/CommandLine.h"
using namespace llvm;
-static cl::opt<bool>
+cl::opt<bool>
ForceStackAlign("force-align-stack",
cl::desc("Force align the stack to the minimum alignment"
" needed for the function."),
@@ -60,7 +60,7 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
Is64Bit = Subtarget->is64Bit();
IsWin64 = Subtarget->isTargetWin64();
- StackAlign = TM.getFrameInfo()->getStackAlignment();
+ StackAlign = TM.getFrameLowering()->getStackAlignment();
if (Is64Bit) {
SlotSize = 8;
@@ -159,46 +159,21 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::YMM7: case X86::YMM15: case X86::MM7:
return 7;
- case X86::ES:
- return 0;
- case X86::CS:
- return 1;
- case X86::SS:
- return 2;
- case X86::DS:
- return 3;
- case X86::FS:
- return 4;
- case X86::GS:
- return 5;
-
- case X86::CR0:
- return 0;
- case X86::CR1:
- return 1;
- case X86::CR2:
- return 2;
- case X86::CR3:
- return 3;
- case X86::CR4:
- return 4;
-
- case X86::DR0:
- return 0;
- case X86::DR1:
- return 1;
- case X86::DR2:
- return 2;
- case X86::DR3:
- return 3;
- case X86::DR4:
- return 4;
- case X86::DR5:
- return 5;
- case X86::DR6:
- return 6;
- case X86::DR7:
- return 7;
+ case X86::ES: return 0;
+ case X86::CS: return 1;
+ case X86::SS: return 2;
+ case X86::DS: return 3;
+ case X86::FS: return 4;
+ case X86::GS: return 5;
+
+ case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
+ case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
+ case X86::CR2: case X86::CR10: case X86::DR2: return 2;
+ case X86::CR3: case X86::CR11: case X86::DR3: return 3;
+ case X86::CR4: case X86::CR12: case X86::DR4: return 4;
+ case X86::CR5: case X86::CR13: case X86::DR5: return 5;
+ case X86::CR6: case X86::CR14: case X86::DR6: return 6;
+ case X86::CR7: case X86::CR15: case X86::DR7: return 7;
// Pseudo index registers are equivalent to a "none"
// scaled index (See Intel Manual 2A, table 2-3)
@@ -295,9 +270,14 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
}
break;
case X86::sub_32bit:
- if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
+ if (B == &X86::GR32RegClass) {
if (A->getSize() == 8)
return A;
+ } else if (B == &X86::GR32_NOSPRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass)
+ return &X86::GR64_NOSPRegClass;
+ if (A->getSize() == 8)
+ return getCommonSubClass(A, &X86::GR64_NOSPRegClass);
} else if (B == &X86::GR32_ABCDRegClass) {
if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
A == &X86::GR64_NOREXRegClass ||
@@ -336,10 +316,16 @@ X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return &X86::GR64RegClass;
return &X86::GR32RegClass;
- case 1: // Normal GRPs except the stack pointer (for encoding reasons).
+ case 1: // Normal GPRs except the stack pointer (for encoding reasons).
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
+ case 2: // Available for tailcall (not callee-saved GPRs).
+ if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
+ return &X86::GR64_TCW64RegClass;
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return &X86::GR64_TCRegClass;
+ return &X86::GR32_TCRegClass;
}
}
@@ -408,6 +394,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
// Set the stack-pointer register and its aliases as reserved.
Reserved.set(X86::RSP);
Reserved.set(X86::ESP);
@@ -420,7 +408,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(X86::IP);
// Set the frame-pointer register and its aliases as reserved if needed.
- if (hasFP(MF)) {
+ if (TFI->hasFP(MF)) {
Reserved.set(X86::RBP);
Reserved.set(X86::EBP);
Reserved.set(X86::BP);
@@ -445,21 +433,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-/// hasFP - Return true if the specified function should have a dedicated frame
-/// pointer register. This is true if the function has variable sized allocas
-/// or if frame pointer elimination is disabled.
-bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const MachineModuleInfo &MMI = MF.getMMI();
-
- return (DisableFramePointerElim(MF) ||
- needsStackRealignment(MF) ||
- MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken() ||
- MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
- MMI.callsUnwindInit());
-}
-
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return (RealignStack &&
@@ -478,62 +451,25 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
if (0 && requiresRealignment && MFI->hasVarSizedObjects())
report_fatal_error(
"Stack realignment in presense of dynamic allocas is not supported");
-
+
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
return canRealignStack(MF);
-
- return requiresRealignment && canRealignStack(MF);
-}
-bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
- return !MF.getFrameInfo()->hasVarSizedObjects();
+ return requiresRealignment && canRealignStack(MF);
}
bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
- if (Reg == FramePtr && hasFP(MF)) {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (Reg == FramePtr && TFI->hasFP(MF)) {
FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
return true;
}
return false;
}
-int
-X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
- uint64_t StackSize = MFI->getStackSize();
-
- if (needsStackRealignment(MF)) {
- if (FI < 0) {
- // Skip the saved EBP.
- Offset += SlotSize;
- } else {
- unsigned Align = MFI->getObjectAlignment(FI);
- assert((-(Offset + StackSize)) % Align == 0);
- Align = 0;
- return Offset + StackSize;
- }
- // FIXME: Support tail calls
- } else {
- if (!hasFP(MF))
- return Offset + StackSize;
-
- // Skip the saved EBP.
- Offset += SlotSize;
-
- // Skip the RETADDR move area
- const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta < 0)
- Offset -= TailCallReturnAddrDelta;
- }
-
- return Offset;
-}
-
static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
if (is64Bit) {
if (isInt<8>(Imm))
@@ -561,69 +497,70 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
void X86RegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (!hasReservedCallFrame(MF)) {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
+ int Opcode = I->getOpcode();
+ bool isDestroy = Opcode == getCallFrameDestroyOpcode();
+ DebugLoc DL = I->getDebugLoc();
+ uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+ uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
+ I = MBB.erase(I);
+
+ if (!reseveCallFrame) {
// If the stack pointer can be changed after prologue, turn the
// adjcallstackup instruction into a 'sub ESP, <amt>' and the
// adjcallstackdown instruction into 'add ESP, <amt>'
// TODO: consider using push / pop instead of sub + store / add
- MachineInstr *Old = I;
- uint64_t Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
-
- MachineInstr *New = 0;
- if (Old->getOpcode() == getCallFrameSetupOpcode()) {
- New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(getSUBriOpcode(Is64Bit, Amount)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(Amount);
- } else {
- assert(Old->getOpcode() == getCallFrameDestroyOpcode());
-
- // Factor out the amount the callee already popped.
- uint64_t CalleeAmt = Old->getOperand(1).getImm();
- Amount -= CalleeAmt;
-
- if (Amount) {
- unsigned Opc = getADDriOpcode(Is64Bit, Amount);
- New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(Amount);
- }
- }
+ if (Amount == 0)
+ return;
+
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+ MachineInstr *New = 0;
+ if (Opcode == getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(Amount);
+ } else {
+ assert(Opcode == getCallFrameDestroyOpcode());
- if (New) {
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
+ // Factor out the amount the callee already popped.
+ Amount -= CalleeAmt;
- // Replace the pseudo instruction with a new instruction.
- MBB.insert(I, New);
+ if (Amount) {
+ unsigned Opc = getADDriOpcode(Is64Bit, Amount);
+ New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
}
}
- } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
- // If we are performing frame pointer elimination and if the callee pops
- // something off the stack pointer, add it back. We do this until we have
- // more advanced stack pointer tracking ability.
- if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
- unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
- MachineInstr *Old = I;
- MachineInstr *New =
- BuildMI(MF, Old->getDebugLoc(), TII.get(Opc),
- StackPtr)
- .addReg(StackPtr)
- .addImm(CalleeAmt);
+ if (New) {
// The EFLAGS implicit def is dead.
New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction.
MBB.insert(I, New);
}
+
+ return;
}
- MBB.erase(I);
+ if (Opcode == getCallFrameDestroyOpcode() && CalleeAmt) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back. We do this until we have
+ // more advanced stack pointer tracking ability.
+ unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
+ MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(CalleeAmt);
+
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+ MBB.insert(I, New);
+ }
}
void
@@ -634,6 +571,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned i = 0;
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
while (!MI.getOperand(i).isFI()) {
++i;
@@ -650,7 +588,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else if (AfterFPPop)
BasePtr = StackPtr;
else
- BasePtr = (hasFP(MF) ? FramePtr : StackPtr);
+ BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
// This must be part of a four operand memory reference. Replace the
// FrameIndex with base register with EBP. Add an offset to the offset.
@@ -660,11 +598,10 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FIOffset;
if (AfterFPPop) {
// Tail call jmp happens after FP is popped.
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
const MachineFrameInfo *MFI = MF.getFrameInfo();
- FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea();
+ FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
} else
- FIOffset = getFrameIndexOffset(MF, FrameIndex);
+ FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
if (MI.getOperand(i+3).isImm()) {
// Offset is a 32-bit integer.
@@ -677,710 +614,14 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-void
-X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-
- if (TailCallReturnAddrDelta < 0) {
- // create RETURNADDR area
- // arg
- // arg
- // RETADDR
- // { ...
- // RETADDR area
- // ...
- // }
- // [EBP]
- MFI->CreateFixedObject(-TailCallReturnAddrDelta,
- (-1U*SlotSize)+TailCallReturnAddrDelta, true);
- }
-
- if (hasFP(MF)) {
- assert((TailCallReturnAddrDelta <= 0) &&
- "The Delta should always be zero or negative");
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
-
- // Create a frame entry for the EBP register that must be saved.
- int FrameIdx = MFI->CreateFixedObject(SlotSize,
- -(int)SlotSize +
- TFI.getOffsetOfLocalArea() +
- TailCallReturnAddrDelta,
- true);
- assert(FrameIdx == MFI->getObjectIndexBegin() &&
- "Slot for EBP register must be last in order to be found!");
- FrameIdx = 0;
- }
-}
-
-/// emitSPUpdate - Emit a series of instructions to increment / decrement the
-/// stack pointer by a constant value.
-static
-void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, int64_t NumBytes, bool Is64Bit,
- const TargetInstrInfo &TII) {
- bool isSub = NumBytes < 0;
- uint64_t Offset = isSub ? -NumBytes : NumBytes;
- unsigned Opc = isSub ?
- getSUBriOpcode(Is64Bit, Offset) :
- getADDriOpcode(Is64Bit, Offset);
- uint64_t Chunk = (1LL << 31) - 1;
- DebugLoc DL = MBB.findDebugLoc(MBBI);
-
- while (Offset) {
- uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(ThisVal);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
- Offset -= ThisVal;
- }
-}
-
-/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
-static
-void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, uint64_t *NumBytes = NULL) {
- if (MBBI == MBB.begin()) return;
-
- MachineBasicBlock::iterator PI = prior(MBBI);
- unsigned Opc = PI->getOpcode();
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
- PI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes += PI->getOperand(2).getImm();
- MBB.erase(PI);
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
- PI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes -= PI->getOperand(2).getImm();
- MBB.erase(PI);
- }
-}
-
-/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
-static
-void mergeSPUpdatesDown(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, uint64_t *NumBytes = NULL) {
- // FIXME: THIS ISN'T RUN!!!
- return;
-
- if (MBBI == MBB.end()) return;
-
- MachineBasicBlock::iterator NI = llvm::next(MBBI);
- if (NI == MBB.end()) return;
-
- unsigned Opc = NI->getOpcode();
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
- NI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes -= NI->getOperand(2).getImm();
- MBB.erase(NI);
- MBBI = NI;
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
- NI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes += NI->getOperand(2).getImm();
- MBB.erase(NI);
- MBBI = NI;
- }
-}
-
-/// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
-/// stack adjustment is returned as a positive value for ADD and a negative for
-/// SUB.
-static int mergeSPUpdates(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr,
- bool doMergeWithPrevious) {
- if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
- (!doMergeWithPrevious && MBBI == MBB.end()))
- return 0;
-
- MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
- MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
- unsigned Opc = PI->getOpcode();
- int Offset = 0;
-
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
- PI->getOperand(0).getReg() == StackPtr){
- Offset += PI->getOperand(2).getImm();
- MBB.erase(PI);
- if (!doMergeWithPrevious) MBBI = NI;
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
- PI->getOperand(0).getReg() == StackPtr) {
- Offset -= PI->getOperand(2).getImm();
- MBB.erase(PI);
- if (!doMergeWithPrevious) MBBI = NI;
- }
-
- return Offset;
-}
-
-void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
- MCSymbol *Label,
- unsigned FramePtr) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineModuleInfo &MMI = MF.getMMI();
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- if (CSI.empty()) return;
-
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
- const TargetData *TD = MF.getTarget().getTargetData();
- bool HasFP = hasFP(MF);
-
- // Calculate amount of bytes used for return address storing.
- int stackGrowth =
- (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
- TargetFrameInfo::StackGrowsUp ?
- TD->getPointerSize() : -TD->getPointerSize());
-
- // FIXME: This is dirty hack. The code itself is pretty mess right now.
- // It should be rewritten from scratch and generalized sometimes.
-
- // Determine maximum offset (minumum due to stack growth).
- int64_t MaxOffset = 0;
- for (std::vector<CalleeSavedInfo>::const_iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I)
- MaxOffset = std::min(MaxOffset,
- MFI->getObjectOffset(I->getFrameIdx()));
-
- // Calculate offsets.
- int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
- for (std::vector<CalleeSavedInfo>::const_iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I) {
- int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
- unsigned Reg = I->getReg();
- Offset = MaxOffset - Offset + saveAreaOffset;
-
- // Don't output a new machine move if we're re-saving the frame
- // pointer. This happens when the PrologEpilogInserter has inserted an extra
- // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
- // generates one when frame pointers are used. If we generate a "machine
- // move" for this extra "PUSH", the linker will lose track of the fact that
- // the frame pointer should have the value of the first "PUSH" when it's
- // trying to unwind.
- //
- // FIXME: This looks inelegant. It's possibly correct, but it's covering up
- // another bug. I.e., one where we generate a prolog like this:
- //
- // pushl %ebp
- // movl %esp, %ebp
- // pushl %ebp
- // pushl %esi
- // ...
- //
- // The immediate re-push of EBP is unnecessary. At the least, it's an
- // optimization bug. EBP can be used as a scratch register in certain
- // cases, but probably not when we have a frame pointer.
- if (HasFP && FramePtr == Reg)
- continue;
-
- MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
- MachineLocation CSSrc(Reg);
- Moves.push_back(MachineMove(Label, CSDst, CSSrc));
- }
-}
-
-/// emitPrologue - Push callee-saved registers onto the stack, which
-/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
-/// space for local variables. Also emit labels used by the exception handler to
-/// generate the exception handling frames.
-void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const Function *Fn = MF.getFunction();
- const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
- MachineModuleInfo &MMI = MF.getMMI();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- bool needsFrameMoves = MMI.hasDebugInfo() ||
- !Fn->doesNotThrow() || UnwindTablesMandatory;
- uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
- uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
- bool HasFP = hasFP(MF);
- DebugLoc DL;
-
- // If we're forcing a stack realignment we can't rely on just the frame
- // info, we need to know the ABI stack alignment as well in case we
- // have a call out. Otherwise just make sure we have some alignment - we'll
- // go with the minimum SlotSize.
- if (ForceStackAlign) {
- if (MFI->hasCalls())
- MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
- else if (MaxAlign < SlotSize)
- MaxAlign = SlotSize;
- }
-
- // Add RETADDR move area to callee saved frame size.
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta < 0)
- X86FI->setCalleeSavedFrameSize(
- X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
-
- // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
- // function, and use up to 128 bytes of stack space, don't have a frame
- // pointer, calls, or dynamic alloca then we do not need to adjust the
- // stack pointer (we fit in the Red Zone).
- if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
- !needsStackRealignment(MF) &&
- !MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->adjustsStack() && // No calls.
- !Subtarget->isTargetWin64()) { // Win64 has no Red Zone
- uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
- if (HasFP) MinSize += SlotSize;
- StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
- MFI->setStackSize(StackSize);
- } else if (Subtarget->isTargetWin64()) {
- // We need to always allocate 32 bytes as register spill area.
- // FIXME: We might reuse these 32 bytes for leaf functions.
- StackSize += 32;
- MFI->setStackSize(StackSize);
- }
-
- // Insert stack pointer adjustment for later moving of return addr. Only
- // applies to tail call optimized functions where the callee argument stack
- // size is bigger than the callers.
- if (TailCallReturnAddrDelta < 0) {
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL,
- TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(-TailCallReturnAddrDelta);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
- }
-
- // Mapping for machine moves:
- //
- // DST: VirtualFP AND
- // SRC: VirtualFP => DW_CFA_def_cfa_offset
- // ELSE => DW_CFA_def_cfa
- //
- // SRC: VirtualFP AND
- // DST: Register => DW_CFA_def_cfa_register
- //
- // ELSE
- // OFFSET < 0 => DW_CFA_offset_extended_sf
- // REG < 64 => DW_CFA_offset + Reg
- // ELSE => DW_CFA_offset_extended
-
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
- const TargetData *TD = MF.getTarget().getTargetData();
- uint64_t NumBytes = 0;
- int stackGrowth = -TD->getPointerSize();
-
- if (HasFP) {
- // Calculate required stack adjustment.
- uint64_t FrameSize = StackSize - SlotSize;
- if (needsStackRealignment(MF))
- FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
-
- NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
-
- // Get the offset of the stack slot for the EBP register, which is
- // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
- // Update the frame offset adjustment.
- MFI->setOffsetAdjustment(-NumBytes);
-
- // Save EBP/RBP into the appropriate stack slot.
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
- .addReg(FramePtr, RegState::Kill);
-
- if (needsFrameMoves) {
- // Mark the place where EBP/RBP was saved.
- MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
-
- // Define the current CFA rule to use the provided offset.
- if (StackSize) {
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
- Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
- } else {
- // FIXME: Verify & implement for FP
- MachineLocation SPDst(StackPtr);
- MachineLocation SPSrc(StackPtr, stackGrowth);
- Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
- }
-
- // Change the rule for the FramePtr to be an "offset" rule.
- MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
- MachineLocation FPSrc(FramePtr);
- Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
- }
-
- // Update EBP with the new base value...
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
- .addReg(StackPtr);
-
- if (needsFrameMoves) {
- // Mark effective beginning of when frame pointer becomes valid.
- MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
-
- // Define the current CFA to use the EBP/RBP register.
- MachineLocation FPDst(FramePtr);
- MachineLocation FPSrc(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
- }
-
- // Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
- I != E; ++I)
- I->addLiveIn(FramePtr);
-
- // Realign stack
- if (needsStackRealignment(MF)) {
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
- StackPtr).addReg(StackPtr).addImm(-MaxAlign);
-
- // The EFLAGS implicit def is dead.
- MI->getOperand(3).setIsDead();
- }
- } else {
- NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
- }
-
- // Skip the callee-saved push instructions.
- bool PushedRegs = false;
- int StackOffset = 2 * stackGrowth;
-
- while (MBBI != MBB.end() &&
- (MBBI->getOpcode() == X86::PUSH32r ||
- MBBI->getOpcode() == X86::PUSH64r)) {
- PushedRegs = true;
- ++MBBI;
-
- if (!HasFP && needsFrameMoves) {
- // Mark callee-saved push instruction.
- MCSymbol *Label = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
-
- // Define the current CFA rule to use the provided offset.
- unsigned Ptr = StackSize ?
- MachineLocation::VirtualFP : StackPtr;
- MachineLocation SPDst(Ptr);
- MachineLocation SPSrc(Ptr, StackOffset);
- Moves.push_back(MachineMove(Label, SPDst, SPSrc));
- StackOffset += stackGrowth;
- }
- }
-
- DL = MBB.findDebugLoc(MBBI);
-
- // Adjust stack pointer: ESP -= numbytes.
-
- // Windows and cygwin/mingw require a prologue helper routine when allocating
- // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
- // uses __alloca. __alloca and the 32-bit version of __chkstk will probe
- // the stack and adjust the stack pointer in one go. The 64-bit version
- // of __chkstk is only responsible for probing the stack. The 64-bit
- // prologue is responsible for adjusting the stack pointer. Touching the
- // stack at 4K increments is necessary to ensure that the guard pages used
- // by the OS virtual memory manager are allocated in correct sequence.
- if (NumBytes >= 4096 &&
- (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) {
- // Check, whether EAX is livein for this function.
- bool isEAXAlive = false;
- for (MachineRegisterInfo::livein_iterator
- II = MF.getRegInfo().livein_begin(),
- EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
- unsigned Reg = II->first;
- isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
- Reg == X86::AH || Reg == X86::AL);
- }
-
-
- const char *StackProbeSymbol =
- Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
- if (!isEAXAlive) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes);
- BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol(StackProbeSymbol)
- .addReg(StackPtr, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
- } else {
- // Save EAX
- BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
- .addReg(X86::EAX, RegState::Kill);
-
- // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
- // allocated bytes for EAX.
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes - 4);
- BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol(StackProbeSymbol)
- .addReg(StackPtr, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-
- // Restore EAX
- MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
- X86::EAX),
- StackPtr, false, NumBytes - 4);
- MBB.insert(MBBI, MI);
- }
- } else if (NumBytes) {
- // If there is an SUB32ri of ESP immediately before this instruction, merge
- // the two. This can be the case when tail call elimination is enabled and
- // the callee has more arguments then the caller.
- NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
-
- // If there is an ADD32ri or SUB32ri of ESP immediately after this
- // instruction, merge the two instructions.
- mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
-
- if (NumBytes)
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
- }
-
- if ((NumBytes || PushedRegs) && needsFrameMoves) {
- // Mark end of stack pointer adjustment.
- MCSymbol *Label = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
-
- if (!HasFP && NumBytes) {
- // Define the current CFA rule to use the provided offset.
- if (StackSize) {
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP,
- -StackSize + stackGrowth);
- Moves.push_back(MachineMove(Label, SPDst, SPSrc));
- } else {
- // FIXME: Verify & implement for FP
- MachineLocation SPDst(StackPtr);
- MachineLocation SPSrc(StackPtr, stackGrowth);
- Moves.push_back(MachineMove(Label, SPDst, SPSrc));
- }
- }
-
- // Emit DWARF info specifying the offsets of the callee-saved registers.
- if (PushedRegs)
- emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
- }
-}
-
-void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- unsigned RetOpcode = MBBI->getOpcode();
- DebugLoc DL = MBBI->getDebugLoc();
-
- switch (RetOpcode) {
- default:
- llvm_unreachable("Can only insert epilog into returning blocks");
- case X86::RET:
- case X86::RETI:
- case X86::TCRETURNdi:
- case X86::TCRETURNri:
- case X86::TCRETURNmi:
- case X86::TCRETURNdi64:
- case X86::TCRETURNri64:
- case X86::TCRETURNmi64:
- case X86::EH_RETURN:
- case X86::EH_RETURN64:
- break; // These are ok
- }
-
- // Get the number of bytes to allocate from the FrameInfo.
- uint64_t StackSize = MFI->getStackSize();
- uint64_t MaxAlign = MFI->getMaxAlignment();
- unsigned CSSize = X86FI->getCalleeSavedFrameSize();
- uint64_t NumBytes = 0;
-
- // If we're forcing a stack realignment we can't rely on just the frame
- // info, we need to know the ABI stack alignment as well in case we
- // have a call out. Otherwise just make sure we have some alignment - we'll
- // go with the minimum.
- if (ForceStackAlign) {
- if (MFI->hasCalls())
- MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
- else
- MaxAlign = MaxAlign ? MaxAlign : 4;
- }
-
- if (hasFP(MF)) {
- // Calculate required stack adjustment.
- uint64_t FrameSize = StackSize - SlotSize;
- if (needsStackRealignment(MF))
- FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
-
- NumBytes = FrameSize - CSSize;
-
- // Pop EBP.
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
- } else {
- NumBytes = StackSize - CSSize;
- }
-
- // Skip the callee-saved pop instructions.
- MachineBasicBlock::iterator LastCSPop = MBBI;
- while (MBBI != MBB.begin()) {
- MachineBasicBlock::iterator PI = prior(MBBI);
- unsigned Opc = PI->getOpcode();
-
- if (Opc != X86::POP32r && Opc != X86::POP64r &&
- !PI->getDesc().isTerminator())
- break;
-
- --MBBI;
- }
-
- DL = MBBI->getDebugLoc();
-
- // If there is an ADD32ri or SUB32ri of ESP immediately before this
- // instruction, merge the two instructions.
- if (NumBytes || MFI->hasVarSizedObjects())
- mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
-
- // If dynamic alloca is used, then reset esp to point to the last callee-saved
- // slot before popping them off! Same applies for the case, when stack was
- // realigned.
- if (needsStackRealignment(MF)) {
- // We cannot use LEA here, because stack pointer was realigned. We need to
- // deallocate local frame back.
- if (CSSize) {
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
- MBBI = prior(LastCSPop);
- }
-
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
- StackPtr).addReg(FramePtr);
- } else if (MFI->hasVarSizedObjects()) {
- if (CSSize) {
- unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
- MachineInstr *MI =
- addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
- FramePtr, false, -CSSize);
- MBB.insert(MBBI, MI);
- } else {
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
- .addReg(FramePtr);
- }
- } else if (NumBytes) {
- // Adjust stack pointer back: ESP += numbytes.
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
- }
-
- // We're returning from function via eh_return.
- if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
- MBBI = prior(MBB.end());
- MachineOperand &DestAddr = MBBI->getOperand(0);
- assert(DestAddr.isReg() && "Offset should be in register!");
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
- StackPtr).addReg(DestAddr.getReg());
- } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
- RetOpcode == X86::TCRETURNmi ||
- RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
- RetOpcode == X86::TCRETURNmi64) {
- bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
- // Tail call return: adjust the stack pointer and jump to callee.
- MBBI = prior(MBB.end());
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
- assert(StackAdjust.isImm() && "Expecting immediate value.");
-
- // Adjust stack pointer.
- int StackAdj = StackAdjust.getImm();
- int MaxTCDelta = X86FI->getTCReturnAddrDelta();
- int Offset = 0;
- assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
-
- // Incoporate the retaddr area.
- Offset = StackAdj-MaxTCDelta;
- assert(Offset >= 0 && "Offset should never be negative");
-
- if (Offset) {
- // Check for possible merge with preceeding ADD instruction.
- Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
- }
-
- // Jump to label or value in register.
- if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
- BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
- ? X86::TAILJMPd : X86::TAILJMPd64)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
- ? X86::TAILJMPm : X86::TAILJMPm64));
- for (unsigned i = 0; i != 5; ++i)
- MIB.addOperand(MBBI->getOperand(i));
- } else if (RetOpcode == X86::TCRETURNri64) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- } else {
- BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- }
-
- MachineInstr *NewMI = prior(MBBI);
- for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
- NewMI->addOperand(MBBI->getOperand(i));
-
- // Delete the pseudo instruction TCRETURN.
- MBB.erase(MBBI);
- } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
- (X86FI->getTCReturnAddrDelta() < 0)) {
- // Add the return addr area delta back since we are not tail calling.
- int delta = -1*X86FI->getTCReturnAddrDelta();
- MBBI = prior(MBB.end());
-
- // Check for possible merge with preceeding ADD instruction.
- delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
- }
-}
-
unsigned X86RegisterInfo::getRARegister() const {
return Is64Bit ? X86::RIP // Should have dwarf #16.
: X86::EIP; // Should have dwarf #8.
}
unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return hasFP(MF) ? FramePtr : StackPtr;
-}
-
-void
-X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
- // Calculate amount of bytes used for return address storing
- int stackGrowth = (Is64Bit ? -8 : -4);
-
- // Initial state of the frame pointer is esp+stackGrowth.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(StackPtr, stackGrowth);
- Moves.push_back(MachineMove(0, Dst, Src));
-
- // Add return address to move list
- MachineLocation CSDst(StackPtr, stackGrowth);
- MachineLocation CSSrc(getRARegister());
- Moves.push_back(MachineMove(0, CSDst, CSSrc));
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
unsigned X86RegisterInfo::getEHExceptionRegister() const {
@@ -1579,13 +820,13 @@ namespace {
// Be over-conservative: scan over all vreg defs and find whether vector
// registers are used. If yes, there is a possibility that vector register
// will be spilled and thus require dynamic stack realignment.
- for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
- RegNum < RI.getLastVirtReg(); ++RegNum)
- if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) {
+ for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
FuncInfo->setReserveFP(true);
return true;
}
-
+ }
// Nothing to do
return false;
}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 527df05..064be64 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -111,14 +111,10 @@ public:
/// register scavenger to determine what registers are free.
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasFP(const MachineFunction &MF) const;
-
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
- bool hasReservedCallFrame(const MachineFunction &MF) const;
-
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
@@ -129,19 +125,12 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, RegScavenger *RS = NULL) const;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
-
- void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
- unsigned FramePtr) const;
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+ unsigned getStackRegister() const { return StackPtr; }
+ // FIXME: Move to FrameInfok
+ unsigned getSlotSize() const { return SlotSize; }
// Exception handling queries.
unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 95269b1..612fac2 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -1,10 +1,10 @@
//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 Register file, defining the registers themselves,
@@ -34,8 +34,8 @@ let Namespace = "X86" in {
// because the register file generator is smart enough to figure out that
// AL aliases AX if we tell it that AX aliased AL (for example).
- // Dwarf numbering is different for 32-bit and 64-bit, and there are
- // variations by target as well. Currently the first entry is for X86-64,
+ // Dwarf numbering is different for 32-bit and 64-bit, and there are
+ // variations by target as well. Currently the first entry is for X86-64,
// second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
// and debug information on X86-32/Darwin)
@@ -81,7 +81,7 @@ let Namespace = "X86" in {
def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
}
def IP : Register<"ip">, DwarfRegNum<[16]>;
-
+
// X86-64 only
let SubRegIndices = [sub_8bit] in {
def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
@@ -103,8 +103,8 @@ let Namespace = "X86" in {
def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[5, 7, 7]>;
def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[6, 4, 5]>;
def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>;
- def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
-
+ def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
+
// X86-64 only
def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>;
def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>;
@@ -208,7 +208,7 @@ let Namespace = "X86" in {
def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>;
def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>;
def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>;
- def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>;
+ def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>;
// Status flags register
def EFLAGS : Register<"flags">;
@@ -220,7 +220,7 @@ let Namespace = "X86" in {
def ES : Register<"es">;
def FS : Register<"fs">;
def GS : Register<"gs">;
-
+
// Debug registers
def DR0 : Register<"dr0">;
def DR1 : Register<"dr1">;
@@ -230,8 +230,8 @@ let Namespace = "X86" in {
def DR5 : Register<"dr5">;
def DR6 : Register<"dr6">;
def DR7 : Register<"dr7">;
-
- // Condition registers
+
+ // Control registers
def CR0 : Register<"cr0">;
def CR1 : Register<"cr1">;
def CR2 : Register<"cr2">;
@@ -241,6 +241,13 @@ let Namespace = "X86" in {
def CR6 : Register<"cr6">;
def CR7 : Register<"cr7">;
def CR8 : Register<"cr8">;
+ def CR9 : Register<"cr9">;
+ def CR10 : Register<"cr10">;
+ def CR11 : Register<"cr11">;
+ def CR12 : Register<"cr12">;
+ def CR13 : Register<"cr13">;
+ def CR14 : Register<"cr14">;
+ def CR15 : Register<"cr15">;
// Pseudo index registers
def EIZ : Register<"eiz">;
@@ -254,10 +261,10 @@ let Namespace = "X86" in {
// implicitly defined to be the register allocation order.
//
-// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
// R12, R13, R14, and R15 for X86-64) are callee-save registers.
// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
-// R8B, ... R15B.
+// R8B, ... R15B.
// Allocate R12 and R13 last, as these require an extra byte when
// encoded in x86_64 instructions.
// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
@@ -292,14 +299,14 @@ def GR8 : RegisterClass<"X86", [i8], 8,
GR8Class::iterator
GR8Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP / EBP to being a frame ptr?
if (!Subtarget.is64Bit())
// In 32-mode, none of the 8-bit registers aliases EBP or ESP.
return begin() + 8;
- else if (RI->hasFP(MF) || MFI->getReserveFP())
+ else if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate SPL or BPL.
return array_endof(X86_GR8_AO_64) - 1;
else
@@ -337,12 +344,12 @@ def GR16 : RegisterClass<"X86", [i16], 16,
GR16Class::iterator
GR16Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (Subtarget.is64Bit()) {
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate SP or BP.
return array_endof(X86_GR16_AO_64) - 1;
else
@@ -350,7 +357,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
return array_endof(X86_GR16_AO_64);
} else {
// Does the function dedicate EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate SP or BP.
return begin() + 6;
else
@@ -389,12 +396,12 @@ def GR32 : RegisterClass<"X86", [i32], 32,
GR32Class::iterator
GR32Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (Subtarget.is64Bit()) {
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate ESP or EBP.
return array_endof(X86_GR32_AO_64) - 1;
else
@@ -402,7 +409,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
return array_endof(X86_GR32_AO_64);
} else {
// Does the function dedicate EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate ESP or EBP.
return begin() + 6;
else
@@ -429,13 +436,13 @@ def GR64 : RegisterClass<"X86", [i64], 64,
GR64Class::iterator
GR64Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (!Subtarget.is64Bit())
return begin(); // None of these are allocatable in 32-bit.
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
return end()-3; // If so, don't allocate RIP, RSP or RBP
else
return end()-2; // If not, just don't allocate RIP or RSP
@@ -446,18 +453,16 @@ def GR64 : RegisterClass<"X86", [i64], 64,
// Segment registers for use by MOV instructions (and others) that have a
// segment register as one operand. Always contain a 16-bit segment
// descriptor.
-def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> {
-}
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]>;
// Debug registers.
def DEBUG_REG : RegisterClass<"X86", [i32], 32,
- [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> {
-}
+ [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]>;
// Control registers.
def CONTROL_REG : RegisterClass<"X86", [i64], 64,
- [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> {
-}
+ [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8,
+ CR9, CR10, CR11, CR12, CR13, CR14, CR15]>;
// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
@@ -465,10 +470,8 @@ def CONTROL_REG : RegisterClass<"X86", [i64], 64,
// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
// and GR64_ABCD are classes for registers that support 8-bit h-register
// operations.
-def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> {
-}
-def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> {
-}
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]>;
def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
}
@@ -493,6 +496,9 @@ def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
(GR32_TC sub_32bit)];
}
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX,
+ R8, R9, R11]>;
+
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
[AL, CL, DL, AH, CH, DH, BL, BH]> {
@@ -538,10 +544,10 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16,
GR16_NOREXClass::iterator
GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP / EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate SP or BP.
return end() - 2;
else
@@ -562,10 +568,10 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
GR32_NOREXClass::iterator
GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP / EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate ESP or EBP.
return end() - 2;
else
@@ -587,10 +593,10 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
GR64_NOREXClass::iterator
GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate RIP, RSP or RBP.
return end() - 3;
else
@@ -629,12 +635,12 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
GR32_NOSPClass::iterator
GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (Subtarget.is64Bit()) {
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate EBP.
return array_endof(X86_GR32_NOSP_AO_64) - 1;
else
@@ -642,7 +648,7 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
return array_endof(X86_GR32_NOSP_AO_64);
} else {
// Does the function dedicate EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate EBP.
return begin() + 6;
else
@@ -667,13 +673,13 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
GR64_NOSPClass::iterator
GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (!Subtarget.is64Bit())
return begin(); // None of these are allocatable in 32-bit.
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
return end()-1; // If so, don't allocate RBP
else
return end(); // If not, any reg in this class is ok.
@@ -695,10 +701,10 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const
{
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate RBP.
return end() - 1;
else
@@ -784,7 +790,7 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32,
}
// Generic vector registers: VR64 and VR128.
-def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64,
+def VR64: RegisterClass<"X86", [x86mmx], 64,
[MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 6297a27..42e8193 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -32,10 +32,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile,
- const Value *DstSV,
- uint64_t DstSVOff) const {
+ MachinePointerInfo DstPtrInfo) const {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256)
+ return SDValue();
+
// If not DWORD aligned or size is more than the threshold, call the library.
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
@@ -133,7 +136,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
Dst, InFlag);
InFlag = Chain.getValue(1);
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
@@ -147,7 +150,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
X86::ECX,
Left, InFlag);
InFlag = Chain.getValue(1);
- Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
} else if (BytesLeft) {
@@ -161,7 +164,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
DAG.getConstant(Offset, AddrVT)),
Src,
DAG.getConstant(BytesLeft, SizeVT),
- Align, isVolatile, DstSV, DstSVOff + Offset);
+ Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
}
// TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
@@ -173,10 +176,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
// This requires the copy size to be a constant, preferrably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -186,14 +187,29 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
return SDValue();
- /// If not DWORD aligned, call the library.
- if ((Align & 3) != 0)
+ /// If not DWORD aligned, it is more efficient to call the library. However
+ /// if calling the library is not allowed (AlwaysInline), then soldier on as
+ /// the code generated here is better than the long load-store sequence we
+ /// would otherwise get.
+ if (!AlwaysInline && (Align & 3) != 0)
+ return SDValue();
+
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256 ||
+ SrcPtrInfo.getAddrSpace() >= 256)
return SDValue();
- // DWORD aligned
- EVT AVT = MVT::i32;
- if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
- AVT = MVT::i64;
+ MVT AVT;
+ if (Align & 1)
+ AVT = MVT::i8;
+ else if (Align & 2)
+ AVT = MVT::i16;
+ else if (Align & 4)
+ // DWORD aligned
+ AVT = MVT::i32;
+ else
+ // QWORD aligned
+ AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
unsigned UBytes = AVT.getSizeInBits() / 8;
unsigned CountVal = SizeVal / UBytes;
@@ -214,7 +230,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
Src, InFlag);
InFlag = Chain.getValue(1);
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
array_lengthof(Ops));
@@ -234,8 +250,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
DAG.getConstant(Offset, SrcVT)),
DAG.getConstant(BytesLeft, SizeVT),
Align, isVolatile, AlwaysInline,
- DstSV, DstSVOff + Offset,
- SrcSV, SrcSVOff + Offset));
+ DstPtrInfo.getWithOffset(Offset),
+ SrcPtrInfo.getWithOffset(Offset)));
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index 4f30f31..d1d66fe 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -39,8 +39,7 @@ public:
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile,
- const Value *DstSV,
- uint64_t DstSVOff) const;
+ MachinePointerInfo DstPtrInfo) const;
virtual
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
@@ -48,10 +47,8 @@ public:
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const;
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
};
}
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 0d02e5e..de76856 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -1,4 +1,4 @@
-//===-- X86Subtarget.cpp - X86 Subtarget Information ------------*- C++ -*-===//
+//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,7 +18,7 @@
#include "llvm/GlobalValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallVector.h"
@@ -256,13 +256,14 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if ((ECX >> 9) & 1) X86SSELevel = SSSE3;
if ((ECX >> 19) & 1) X86SSELevel = SSE41;
if ((ECX >> 20) & 1) X86SSELevel = SSE42;
+ // FIXME: AVX codegen support is not ready.
+ //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; }
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
- HasAVX = ((ECX >> 28) & 0x1);
HasAES = IsIntel && ((ECX >> 25) & 0x1);
if (IsIntel || IsAMD) {
@@ -289,6 +290,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, X863DNowLevel(NoThreeDNow)
, HasCMov(false)
, HasX86_64(false)
+ , HasPOPCNT(false)
, HasSSE4A(false)
, HasAVX(false)
, HasAES(false)
@@ -315,11 +317,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
ParseSubtargetFeatures(FS, CPU);
// All X86-64 CPUs also have SSE2, however user might request no SSE via
// -mattr, so don't force SSELevel here.
+ if (HasAVX)
+ X86SSELevel = NoMMXSSE;
} else {
// Otherwise, use CPUID to auto-detect feature set.
AutoDetectSubtargetFeatures();
// Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
- if (Is64Bit && X86SSELevel < SSE2)
+ if (Is64Bit && !HasAVX && X86SSELevel < SSE2)
X86SSELevel = SSE2;
}
@@ -338,9 +342,9 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
assert((!Is64Bit || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64
- // bit targets.
- if (isTargetDarwin() || Is64Bit)
+ // Stack alignment is 16 bytes on Darwin and Linux (both 32 and 64 bit) and
+ // for all 64-bit targets.
+ if (isTargetDarwin() || isTargetLinux() || Is64Bit)
stackAlignment = 16;
if (StackAlignment)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 0ee91ab..8a119b4 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -65,6 +65,9 @@ protected:
///
bool HasX86_64;
+ /// HasPOPCNT - True if the processor supports POPCNT.
+ bool HasPOPCNT;
+
/// HasSSE4A - True if the processor supports SSE4A instructions.
bool HasSSE4A;
@@ -100,7 +103,7 @@ protected:
/// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
///
unsigned MaxInlineSizeThreshold;
-
+
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
@@ -150,7 +153,10 @@ public:
bool hasSSE4A() const { return HasSSE4A; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+ bool hasPOPCNT() const { return HasPOPCNT; }
bool hasAVX() const { return HasAVX; }
+ bool hasXMM() const { return hasSSE1() || hasAVX(); }
+ bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
bool hasAES() const { return HasAES; }
bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; }
@@ -160,23 +166,21 @@ public:
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
-
+
// ELF is a reasonably sane default and the only other X86 targets we
// support are Darwin and Windows. Just use "not those".
- bool isTargetELF() const {
+ bool isTargetELF() const {
return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
}
bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
- bool isTargetMingw() const {
- return TargetTriple.getOS() == Triple::MinGW32 ||
- TargetTriple.getOS() == Triple::MinGW64; }
+ bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
bool isTargetCygMing() const {
return isTargetMingw() || isTargetCygwin();
}
-
+
/// isTargetCOFF - Return true if this is any COFF/Windows target variant.
bool isTargetCOFF() const {
return isTargetMingw() || isTargetCygwin() || isTargetWindows();
@@ -186,22 +190,12 @@ public:
return Is64Bit && (isTargetMingw() || isTargetWindows());
}
- bool isTargetWin32() const {
- return !Is64Bit && (isTargetMingw() || isTargetWindows());
+ bool isTargetEnvMacho() const {
+ return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO);
}
- std::string getDataLayout() const {
- const char *p;
- if (is64Bit())
- p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64";
- else if (isTargetDarwin())
- p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
- else if (isTargetMingw() || isTargetWindows())
- p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32";
- else
- p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
-
- return std::string(p);
+ bool isTargetWin32() const {
+ return !Is64Bit && (isTargetMingw() || isTargetWindows());
}
bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index ce8636eb..889c824 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -30,10 +30,12 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
case Triple::Darwin:
return new X86MCAsmInfoDarwin(TheTriple);
case Triple::MinGW32:
- case Triple::MinGW64:
case Triple::Cygwin:
case Triple::Win32:
- return new X86MCAsmInfoCOFF(TheTriple);
+ if (TheTriple.getEnvironment() == Triple::MachO)
+ return new X86MCAsmInfoDarwin(TheTriple);
+ else
+ return new X86MCAsmInfoCOFF(TheTriple);
default:
return new X86ELFMCAsmInfo(TheTriple);
}
@@ -43,22 +45,25 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
MCContext &Ctx, TargetAsmBackend &TAB,
raw_ostream &_OS,
MCCodeEmitter *_Emitter,
- bool RelaxAll) {
+ bool RelaxAll,
+ bool NoExecStack) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
case Triple::Darwin:
return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
case Triple::MinGW32:
- case Triple::MinGW64:
case Triple::Cygwin:
case Triple::Win32:
- return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+ if (TheTriple.getEnvironment() == Triple::MachO)
+ return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ else
+ return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
default:
- return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
}
}
-extern "C" void LLVMInitializeX86Target() {
+extern "C" void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
@@ -89,28 +94,38 @@ extern "C" void LLVMInitializeX86Target() {
X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
- : X86TargetMachine(T, TT, FS, false) {
+ : X86TargetMachine(T, TT, FS, false),
+ DataLayout(getSubtargetImpl()->isTargetDarwin() ?
+ "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" :
+ (getSubtargetImpl()->isTargetCygMing() ||
+ getSubtargetImpl()->isTargetWindows()) ?
+ "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"),
+ InstrInfo(*this),
+ TSInfo(*this),
+ TLInfo(*this),
+ JITInfo(*this) {
}
X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
- : X86TargetMachine(T, TT, FS, true) {
+ : X86TargetMachine(T, TT, FS, true),
+ DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"),
+ InstrInfo(*this),
+ TSInfo(*this),
+ TLInfo(*this),
+ JITInfo(*this) {
}
/// X86TargetMachine ctor - Create an X86 target.
///
-X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
+X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool is64Bit)
- : LLVMTargetMachine(T, TT),
+ : LLVMTargetMachine(T, TT),
Subtarget(TT, FS, is64Bit),
- DataLayout(Subtarget.getDataLayout()),
- FrameInfo(TargetFrameInfo::StackGrowsDown,
- Subtarget.getStackAlignment(),
- (Subtarget.isTargetWin64() ? -40 :
- (Subtarget.is64Bit() ? -8 : -4))),
- InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
- ELFWriterInfo(*this) {
+ FrameLowering(*this, Subtarget),
+ ELFWriterInfo(is64Bit, true) {
DefRelocModel = getRelocationModel();
// If no relocation model was picked, default as appropriate for the target.
@@ -217,12 +232,12 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE) {
// FIXME: Move this to TargetJITInfo!
// On Darwin, do not override 64-bit setting made in X86TargetMachine().
- if (DefRelocModel == Reloc::Default &&
+ if (DefRelocModel == Reloc::Default &&
(!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
setRelocationModel(Reloc::Static);
Subtarget.setPICStyle(PICStyles::None);
}
-
+
PM.add(createX86JITCodeEmitterPass(*this, JCE));
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index f9fb424..5973922 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -14,16 +14,17 @@
#ifndef X86TARGETMACHINE_H
#define X86TARGETMACHINE_H
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "X86.h"
#include "X86ELFWriterInfo.h"
#include "X86InstrInfo.h"
-#include "X86JITInfo.h"
-#include "X86Subtarget.h"
#include "X86ISelLowering.h"
+#include "X86FrameLowering.h"
+#include "X86JITInfo.h"
#include "X86SelectionDAGInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -31,12 +32,7 @@ class formatted_raw_ostream;
class X86TargetMachine : public LLVMTargetMachine {
X86Subtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
- TargetFrameInfo FrameInfo;
- X86InstrInfo InstrInfo;
- X86JITInfo JITInfo;
- X86TargetLowering TLInfo;
- X86SelectionDAGInfo TSInfo;
+ X86FrameLowering FrameLowering;
X86ELFWriterInfo ELFWriterInfo;
Reloc::Model DefRelocModel; // Reloc model before it's overridden.
@@ -49,20 +45,25 @@ public:
X86TargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool is64Bit);
- virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
- virtual X86JITInfo *getJITInfo() { return &JITInfo; }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ llvm_unreachable("getInstrInfo not implemented");
+ }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ llvm_unreachable("getJITInfo not implemented");
+ }
virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; }
- virtual const X86TargetLowering *getTargetLowering() const {
- return &TLInfo;
+ virtual const X86TargetLowering *getTargetLowering() const {
+ llvm_unreachable("getTargetLowering not implemented");
}
virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
- return &TSInfo;
+ llvm_unreachable("getSelectionDAGInfo not implemented");
}
virtual const X86RegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
+ return &getInstrInfo()->getRegisterInfo();
}
- virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const X86ELFWriterInfo *getELFWriterInfo() const {
return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
}
@@ -79,17 +80,53 @@ public:
/// X86_32TargetMachine - X86 32-bit target machine.
///
class X86_32TargetMachine : public X86TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ X86InstrInfo InstrInfo;
+ X86SelectionDAGInfo TSInfo;
+ X86TargetLowering TLInfo;
+ X86JITInfo JITInfo;
public:
X86_32TargetMachine(const Target &T, const std::string &M,
const std::string &FS);
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ return &JITInfo;
+ }
};
/// X86_64TargetMachine - X86 64-bit target machine.
///
class X86_64TargetMachine : public X86TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ X86InstrInfo InstrInfo;
+ X86SelectionDAGInfo TSInfo;
+ X86TargetLowering TLInfo;
+ X86JITInfo JITInfo;
public:
X86_64TargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ return &JITInfo;
+ }
};
} // End llvm namespace
OpenPOWER on IntegriCloud