diff options
author | dim <dim@FreeBSD.org> | 2011-02-20 12:57:14 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2011-02-20 12:57:14 +0000 |
commit | cbb70ce070d220642b038ea101d9c0f9fbf860d6 (patch) | |
tree | d2b61ce94e654cb01a254d2195259db5f9cc3f3c /lib/Target/X86 | |
parent | 4ace901e87dac5bbbac78ed325e75462e48e386e (diff) | |
download | FreeBSD-src-cbb70ce070d220642b038ea101d9c0f9fbf860d6.zip FreeBSD-src-cbb70ce070d220642b038ea101d9c0f9fbf860d6.tar.gz |
Vendor import of llvm trunk r126079:
http://llvm.org/svn/llvm-project/llvm/trunk@126079
Diffstat (limited to 'lib/Target/X86')
76 files changed, 11446 insertions, 10602 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp index 26797ab..ec73087 100644 --- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp @@ -65,9 +65,10 @@ public: } }; -} +} // end anonymous namespace -static unsigned MatchRegisterName(StringRef Name); +#define GET_REGISTER_MATCHER +#include "X86GenAsmMatcher.inc" AsmToken X86AsmLexer::LexTokenATT() { AsmToken lexedToken = lexDefinite(); @@ -162,7 +163,3 @@ extern "C" void LLVMInitializeX86AsmLexer() { RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target); RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target); } - -#define REGISTERS_ONLY -#include "X86GenAsmMatcher.inc" -#undef REGISTERS_ONLY diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index f8588d8..1cac07a 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -10,20 +10,21 @@ #include "llvm/Target/TargetAsmParser.h" #include "X86.h" #include "X86Subtarget.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmParser.h" using namespace llvm; namespace { @@ -43,35 +44,32 @@ private: bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } - bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - X86Operand *ParseOperand(); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); bool ParseDirectiveWord(unsigned Size, SMLoc L); - bool MatchInstruction(SMLoc IDLoc, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCInst &Inst); + bool MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out); /// @name Auto-generated Matcher Functions /// { - unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const; - - bool MatchInstructionImpl( - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst); +#define GET_ASSEMBLER_HEADER +#include "X86GenAsmMatcher.inc" /// } public: - X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) - : TargetAsmParser(T), Parser(_Parser), TM(TM) { + X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM) + : TargetAsmParser(T), Parser(parser), TM(TM) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures( &TM.getSubtarget<X86Subtarget>())); } + virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -81,16 +79,16 @@ public: class X86_32ATTAsmParser : public X86ATTAsmParser { public: - X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) - : X86ATTAsmParser(T, _Parser, TM) { + X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM) + : X86ATTAsmParser(T, Parser, TM) { Is64Bit = false; } }; class X86_64ATTAsmParser : public X86ATTAsmParser { public: - X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) - : X86ATTAsmParser(T, _Parser, TM) { + X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM) + : X86ATTAsmParser(T, Parser, TM) { Is64Bit = true; } }; @@ -375,14 +373,18 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, // validation later, so maybe there is no need for this here. RegNo = MatchRegisterName(Tok.getString()); + // If the match failed, try the register name as lowercase. + if (RegNo == 0) + RegNo = MatchRegisterName(LowercaseString(Tok.getString())); + // FIXME: This should be done using Requires<In32BitMode> and // Requires<In64BitMode> so "eiz" usage in 64-bit instructions // can be also checked. if (RegNo == X86::RIZ && !Is64Bit) return Error(Tok.getLoc(), "riz register in 64-bit mode only"); - // Parse %st(1) and "%st" as "%st(0)" - if (RegNo == 0 && Tok.getString() == "st") { + // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. + if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { RegNo = X86::ST0; EndLoc = Tok.getLoc(); Parser.Lex(); // Eat 'st' @@ -617,88 +619,13 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { bool X86ATTAsmParser:: ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // The various flavors of pushf and popf use Requires<In32BitMode> and - // Requires<In64BitMode>, but the assembler doesn't yet implement that. - // For now, just do a manual check to prevent silent misencoding. - if (Is64Bit) { - if (Name == "popfl") - return Error(NameLoc, "popfl cannot be encoded in 64-bit mode"); - else if (Name == "pushfl") - return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode"); - else if (Name == "pusha") - return Error(NameLoc, "pusha cannot be encoded in 64-bit mode"); - } else { - if (Name == "popfq") - return Error(NameLoc, "popfq cannot be encoded in 32-bit mode"); - else if (Name == "pushfq") - return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode"); - } - - // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and - // the form jrcxz is not allowed in 32-bit mode. - if (Is64Bit) { - if (Name == "jcxz") - return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode"); - } else { - if (Name == "jrcxz") - return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode"); - } - - // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to - // represent alternative syntaxes in the .td file, without requiring - // instruction duplication. - StringRef PatchedName = StringSwitch<StringRef>(Name) - .Case("sal", "shl") - .Case("salb", "shlb") - .Case("sall", "shll") - .Case("salq", "shlq") - .Case("salw", "shlw") - .Case("repe", "rep") - .Case("repz", "rep") - .Case("repnz", "repne") - .Case("pushf", Is64Bit ? "pushfq" : "pushfl") - .Case("popf", Is64Bit ? "popfq" : "popfl") - .Case("retl", Is64Bit ? "retl" : "ret") - .Case("retq", Is64Bit ? "ret" : "retq") - .Case("setz", "sete") - .Case("setnz", "setne") - .Case("jz", "je") - .Case("jnz", "jne") - .Case("jc", "jb") - // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode - // jecxz requires an AdSize prefix but jecxz does not have a prefix in - // 32-bit mode. - .Case("jecxz", "jcxz") - .Case("jrcxz", "jcxz") - .Case("jna", "jbe") - .Case("jnae", "jb") - .Case("jnb", "jae") - .Case("jnbe", "ja") - .Case("jnc", "jae") - .Case("jng", "jle") - .Case("jnge", "jl") - .Case("jnl", "jge") - .Case("jnle", "jg") - .Case("jpe", "jp") - .Case("jpo", "jnp") - .Case("cmovcl", "cmovbl") - .Case("cmovcl", "cmovbl") - .Case("cmovnal", "cmovbel") - .Case("cmovnbl", "cmovael") - .Case("cmovnbel", "cmoval") - .Case("cmovncl", "cmovael") - .Case("cmovngl", "cmovlel") - .Case("cmovnl", "cmovgel") - .Case("cmovngl", "cmovlel") - .Case("cmovngel", "cmovll") - .Case("cmovnll", "cmovgel") - .Case("cmovnlel", "cmovgl") - .Case("cmovnzl", "cmovnel") - .Case("cmovzl", "cmovel") - .Case("fwait", "wait") - .Case("movzx", "movzb") - .Default(Name); + StringRef PatchedName = Name; + // FIXME: Hack to recognize setneb as setne. + if (PatchedName.startswith("set") && PatchedName.endswith("b") && + PatchedName != "setb" && PatchedName != "setnb") + PatchedName = PatchedName.substr(0, Name.size()-1); + // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. const MCExpr *ExtraImmOp = 0; if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && @@ -773,12 +700,26 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, PatchedName = "vpclmulqdq"; } } + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); if (ExtraImmOp) Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); - if (getLexer().isNot(AsmToken::EndOfStatement)) { + + // Determine whether this is an instruction prefix. + bool isPrefix = + Name == "lock" || Name == "rep" || + Name == "repe" || Name == "repz" || + Name == "repne" || Name == "repnz" || + Name == "rex64" || Name == "data16"; + + + // This does the actual operand parsing. Don't parse any more if we have a + // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we + // just want to parse the "lock" as the first instruction and the "incl" as + // the next one. + if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { // Parse '*' modifier. if (getLexer().is(AsmToken::Star)) { @@ -790,8 +731,10 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Read the first operand. if (X86Operand *Op = ParseOperand()) Operands.push_back(Op); - else + else { + Parser.EatToEndOfStatement(); return true; + } while (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. @@ -799,23 +742,27 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Parse and remember the operand. if (X86Operand *Op = ParseOperand()) Operands.push_back(Op); - else + else { + Parser.EatToEndOfStatement(); return true; + } } - } - // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1. - if ((Name.startswith("shr") || Name.startswith("sar") || - Name.startswith("shl")) && - Operands.size() == 3 && - static_cast<X86Operand*>(Operands[1])->isImm() && - isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) && - cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) { - delete Operands[1]; - Operands.erase(Operands.begin() + 1); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } } - // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx". + if (getLexer().is(AsmToken::EndOfStatement)) + Parser.Lex(); // Consume the EndOfStatement + else if (isPrefix && getLexer().is(AsmToken::Slash)) + Parser.Lex(); // Consume the prefix separator Slash + + // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> + // "outb %al, %dx". Out doesn't take a memory form, but this is a widely + // documented form in various unofficial manuals, so a lot of code uses it. if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && Operands.size() == 3) { X86Operand &Op = *(X86Operand*)Operands.back(); @@ -829,76 +776,80 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, } } - // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as - // "f{mul*,add*,sub*,div*} $op" - if ((Name.startswith("fmul") || Name.startswith("fadd") || - Name.startswith("fsub") || Name.startswith("fdiv")) && - Operands.size() == 3 && - static_cast<X86Operand*>(Operands[2])->isReg() && - static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) { - delete Operands[2]; - Operands.erase(Operands.begin() + 2); - } - - // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B, - // B". - if (Name.startswith("imul") && Operands.size() == 3 && - static_cast<X86Operand*>(Operands[1])->isImm() && - static_cast<X86Operand*>(Operands.back())->isReg()) { - X86Operand *Op = static_cast<X86Operand*>(Operands.back()); - Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(), - Op->getEndLoc())); - } - - return false; -} - -bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { - StringRef IDVal = DirectiveID.getIdentifier(); - if (IDVal == ".word") - return ParseDirectiveWord(2, DirectiveID.getLoc()); - return true; -} - -/// ParseDirectiveWord -/// ::= .word [ expression (, expression)* ] -bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { - if (getLexer().isNot(AsmToken::EndOfStatement)) { - for (;;) { - const MCExpr *Value; - if (getParser().ParseExpression(Value)) - return true; - - getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); - - if (getLexer().is(AsmToken::EndOfStatement)) - break; - - // FIXME: Improve diagnostic. - if (getLexer().isNot(AsmToken::Comma)) - return Error(L, "unexpected token in directive"); - Parser.Lex(); + // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to + // "shift <op>". + if ((Name.startswith("shr") || Name.startswith("sar") || + Name.startswith("shl") || Name.startswith("sal") || + Name.startswith("rcl") || Name.startswith("rcr") || + Name.startswith("rol") || Name.startswith("ror")) && + Operands.size() == 3) { + X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); + if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && + cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { + delete Operands[1]; + Operands.erase(Operands.begin() + 1); } } - Parser.Lex(); return false; } - -bool -X86ATTAsmParser::MatchInstruction(SMLoc IDLoc, - const SmallVectorImpl<MCParsedAsmOperand*> - &Operands, - MCInst &Inst) { +bool X86ATTAsmParser:: +MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out) { assert(!Operands.empty() && "Unexpect empty operand list!"); - X86Operand *Op = static_cast<X86Operand*>(Operands[0]); assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + // First, handle aliases that expand to multiple instructions. + // FIXME: This should be replaced with a real .td file alias mechanism. + // Also, MatchInstructionImpl should do actually *do* the EmitInstruction + // call. + if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || + Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || + Op->getToken() == "finit" || Op->getToken() == "fsave" || + Op->getToken() == "fstenv" || Op->getToken() == "fclex") { + MCInst Inst; + Inst.setOpcode(X86::WAIT); + Out.EmitInstruction(Inst); + + const char *Repl = + StringSwitch<const char*>(Op->getToken()) + .Case("finit", "fninit") + .Case("fsave", "fnsave") + .Case("fstcw", "fnstcw") + .Case("fstcww", "fnstcw") + .Case("fstenv", "fnstenv") + .Case("fstsw", "fnstsw") + .Case("fstsww", "fnstsw") + .Case("fclex", "fnclex") + .Default(0); + assert(Repl && "Unknown wait-prefixed instruction"); + delete Operands[0]; + Operands[0] = X86Operand::CreateToken(Repl, IDLoc); + } + + bool WasOriginallyInvalidOperand = false; + unsigned OrigErrorInfo; + MCInst Inst; + // First, try a direct match. - if (!MatchInstructionImpl(Operands, Inst)) + switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) { + case Match_Success: + Out.EmitInstruction(Inst); return false; + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_ConversionFail: + return Error(IDLoc, "unable to convert operands to instruction"); + case Match_InvalidOperand: + WasOriginallyInvalidOperand = true; + break; + case Match_MnemonicFail: + break; + } // FIXME: Ideally, we would only attempt suffix matches for things which are // valid prefixes, and we could just infer the right unambiguous @@ -912,15 +863,26 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc, Tmp += ' '; Op->setTokenValue(Tmp.str()); + // If this instruction starts with an 'f', then it is a floating point stack + // instruction. These come in up to three forms for 32-bit, 64-bit, and + // 80-bit floating point, which use the suffixes s,l,t respectively. + // + // Otherwise, we assume that this may be an integer instruction, which comes + // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. + const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; + // Check for the various suffix matches. - Tmp[Base.size()] = 'b'; - bool MatchB = MatchInstructionImpl(Operands, Inst); - Tmp[Base.size()] = 'w'; - bool MatchW = MatchInstructionImpl(Operands, Inst); - Tmp[Base.size()] = 'l'; - bool MatchL = MatchInstructionImpl(Operands, Inst); - Tmp[Base.size()] = 'q'; - bool MatchQ = MatchInstructionImpl(Operands, Inst); + Tmp[Base.size()] = Suffixes[0]; + unsigned ErrorInfoIgnore; + MatchResultTy Match1, Match2, Match3, Match4; + + Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Tmp[Base.size()] = Suffixes[1]; + Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Tmp[Base.size()] = Suffixes[2]; + Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Tmp[Base.size()] = Suffixes[3]; + Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); // Restore the old token. Op->setTokenValue(Base); @@ -928,24 +890,25 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc, // If exactly one matched, then we treat that as a successful match (and the // instruction will already have been filled in correctly, since the failing // matches won't have modified it). - if (MatchB + MatchW + MatchL + MatchQ == 3) + unsigned NumSuccessfulMatches = + (Match1 == Match_Success) + (Match2 == Match_Success) + + (Match3 == Match_Success) + (Match4 == Match_Success); + if (NumSuccessfulMatches == 1) { + Out.EmitInstruction(Inst); return false; + } - // Otherwise, the match failed. + // Otherwise, the match failed, try to produce a decent error message. // If we had multiple suffix matches, then identify this as an ambiguous // match. - if (MatchB + MatchW + MatchL + MatchQ != 4) { + if (NumSuccessfulMatches > 1) { char MatchChars[4]; unsigned NumMatches = 0; - if (!MatchB) - MatchChars[NumMatches++] = 'b'; - if (!MatchW) - MatchChars[NumMatches++] = 'w'; - if (!MatchL) - MatchChars[NumMatches++] = 'l'; - if (!MatchQ) - MatchChars[NumMatches++] = 'q'; + if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0]; + if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1]; + if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2]; + if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3]; SmallString<126> Msg; raw_svector_ostream OS(Msg); @@ -959,14 +922,90 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc, } OS << ")"; Error(IDLoc, OS.str()); - } else { - // FIXME: We should give nicer diagnostics about the exact failure. - Error(IDLoc, "unrecognized instruction"); + return true; } + // Okay, we know that none of the variants matched successfully. + + // If all of the instructions reported an invalid mnemonic, then the original + // mnemonic was invalid. + if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && + (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { + if (!WasOriginallyInvalidOperand) { + Error(IDLoc, "invalid instruction mnemonic '" + Base + "'"); + return true; + } + + // Recover location info for the operand if we know which was the problem. + SMLoc ErrorLoc = IDLoc; + if (OrigErrorInfo != ~0U) { + if (OrigErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + + // If one instruction matched with a missing feature, report this as a + // missing feature. + if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + + (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + } + + // If one instruction matched with an invalid operand, report this as an + // operand failure. + if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + + (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ + Error(IDLoc, "invalid operand for instruction"); + return true; + } + + // If all of these were an outright failure, report it in a useless way. + // FIXME: We should give nicer diagnostics about the exact failure. + Error(IDLoc, "unknown use of instruction mnemonic without a size suffix"); + return true; +} + + +bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal == ".word") + return ParseDirectiveWord(2, DirectiveID.getLoc()); return true; } +/// ParseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + + Parser.Lex(); + return false; +} + + + extern "C" void LLVMInitializeX86AsmLexer(); @@ -977,4 +1016,6 @@ extern "C" void LLVMInitializeX86AsmParser() { LLVMInitializeX86AsmLexer(); } +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION #include "X86GenAsmMatcher.inc" diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index e9399f5..b5fa94f 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -24,10 +24,12 @@ set(sources X86ELFWriterInfo.cpp X86FastISel.cpp X86FloatingPoint.cpp + X86FrameLowering.cpp X86ISelDAGToDAG.cpp X86ISelLowering.cpp X86InstrInfo.cpp X86JITInfo.cpp + X86MachObjectWriter.cpp X86MCAsmInfo.cpp X86MCCodeEmitter.cpp X86MCInstLower.cpp @@ -39,14 +41,24 @@ set(sources ) if( CMAKE_CL_64 ) + # A workaround for a bug in cmake 2.8.3. See PR 8885. + if( CMAKE_VERSION STREQUAL "2.8.3" ) + include(CMakeDetermineCompilerId) + endif() + # end of workaround. enable_language(ASM_MASM) ADD_CUSTOM_COMMAND( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj + MAIN_DEPENDENCY X86CompilationCallback_Win64.asm COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm ) set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj) endif() add_llvm_target(X86CodeGen ${sources}) +add_subdirectory(AsmParser) +add_subdirectory(Disassembler) +add_subdirectory(InstPrinter) +add_subdirectory(TargetInfo) +add_subdirectory(Utils) diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index 97589c0..972a0d9 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -5,7 +5,7 @@ add_llvm_library(LLVMX86Disassembler X86DisassemblerDecoder.c ) # workaround for hanging compilation on MSVC9 and 10 -if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) +if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) set_property( SOURCE X86Disassembler.cpp PROPERTY COMPILE_FLAGS "/Od" diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 09f1584..691e2d7 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -157,9 +157,8 @@ static void translateRegister(MCInst &mcInst, Reg reg) { /// @param immediate - The immediate value to append. /// @param operand - The operand, as stored in the descriptor table. /// @param insn - The internal instruction. -static void translateImmediate(MCInst &mcInst, - uint64_t immediate, - OperandSpecifier &operand, +static void translateImmediate(MCInst &mcInst, uint64_t immediate, + const OperandSpecifier &operand, InternalInstruction &insn) { // Sign-extend the immediate if necessary. @@ -392,9 +391,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { /// @param insn - The instruction to extract Mod, R/M, and SIB fields /// from. /// @return - 0 on success; nonzero otherwise -static bool translateRM(MCInst &mcInst, - OperandSpecifier &operand, - InternalInstruction &insn) { +static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, + InternalInstruction &insn) { switch (operand.type) { default: debug("Unexpected type for a R/M operand"); @@ -461,9 +459,8 @@ static bool translateFPRegister(MCInst &mcInst, /// @param operand - The operand, as stored in the descriptor table. /// @param insn - The internal instruction. /// @return - false on success; true otherwise. -static bool translateOperand(MCInst &mcInst, - OperandSpecifier &operand, - InternalInstruction &insn) { +static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, + InternalInstruction &insn) { switch (operand.encoding) { default: debug("Unhandled operand encoding during translation"); diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index 9c54262..550cf9d 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -78,7 +78,7 @@ const char* name; #define INSTRUCTION_IDS \ - InstrUID* instructionIDs; + const InstrUID *instructionIDs; #include "X86DisassemblerDecoderCommon.h" diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 6c3ff6b..b6546fc 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -27,12 +27,6 @@ typedef int8_t bool; -#ifdef __GNUC__ -#define NORETURN __attribute__((noreturn)) -#else -#define NORETURN -#endif - #ifndef NDEBUG #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) #else @@ -103,7 +97,7 @@ static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM) { - struct ModRMDecision* dec; + const struct ModRMDecision* dec; switch (type) { default: @@ -147,7 +141,7 @@ static InstrUID decode(OpcodeType type, * decode(); specifierForUID will not check bounds. * @return - A pointer to the specification for that instruction. */ -static struct InstructionSpecifier* specifierForUID(InstrUID uid) { +static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { return &INSTRUCTIONS_SYM[uid]; } @@ -296,7 +290,7 @@ static int readPrefixes(struct InternalInstruction* insn) { BOOL isPrefix = TRUE; BOOL prefixGroups[4] = { FALSE }; uint64_t prefixLocation; - uint8_t byte; + uint8_t byte = 0; BOOL hasAdSize = FALSE; BOOL hasOpSize = FALSE; @@ -394,6 +388,7 @@ static int readPrefixes(struct InternalInstruction* insn) { } } else { unconsumeByte(insn); + insn->necessaryPrefixLocation = insn->readerCursor - 1; } if (insn->mode == MODE_16BIT) { @@ -405,7 +400,7 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->registerSize = (hasOpSize ? 2 : 4); insn->addressSize = (hasAdSize ? 2 : 4); insn->displacementSize = (hasAdSize ? 2 : 4); - insn->immediateSize = (hasAdSize ? 2 : 4); + insn->immediateSize = (hasOpSize ? 2 : 4); } else if (insn->mode == MODE_64BIT) { if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { insn->registerSize = 8; @@ -517,7 +512,8 @@ static int getIDWithAttrMask(uint16_t* instructionID, insn->opcode); if (hasModRMExtension) { - readModRM(insn); + if (readModRM(insn)) + return -1; *instructionID = decode(insn->opcodeType, instructionClass, @@ -632,9 +628,9 @@ static int getID(struct InternalInstruction* insn) { * instead of F2 changes a 32 to a 64, we adopt the new encoding. */ - struct InstructionSpecifier* spec; + const struct InstructionSpecifier *spec; uint16_t instructionIDWithREXw; - struct InstructionSpecifier* specWithREXw; + const struct InstructionSpecifier *specWithREXw; spec = specifierForUID(instructionID); @@ -672,9 +668,9 @@ static int getID(struct InternalInstruction* insn) { * in the right place we check if there's a 16-bit operation. */ - struct InstructionSpecifier* spec; + const struct InstructionSpecifier *spec; uint16_t instructionIDWithOpsize; - struct InstructionSpecifier* specWithOpsize; + const struct InstructionSpecifier *specWithOpsize; spec = specifierForUID(instructionID); @@ -866,7 +862,8 @@ static int readModRM(struct InternalInstruction* insn) { if (insn->consumedModRM) return 0; - consumeByte(insn, &insn->modRM); + if (consumeByte(insn, &insn->modRM)) + return -1; insn->consumedModRM = TRUE; mod = modFromModRM(insn->modRM); @@ -1067,7 +1064,7 @@ GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) * invalid for its class. */ static int fixupReg(struct InternalInstruction *insn, - struct OperandSpecifier *op) { + const struct OperandSpecifier *op) { uint8_t valid; dbgprintf(insn, "fixupReg()"); diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 28ba86b..4f4fbcd 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -24,7 +24,7 @@ extern "C" { const char* name; #define INSTRUCTION_IDS \ - InstrUID* instructionIDs; + const InstrUID *instructionIDs; #include "X86DisassemblerDecoderCommon.h" @@ -423,7 +423,7 @@ struct InternalInstruction { /* The instruction ID, extracted from the decode table */ uint16_t instructionID; /* The specifier for the instruction, from the instruction info table */ - struct InstructionSpecifier* spec; + const struct InstructionSpecifier *spec; /* state for additional bytes, consumed during operand decode. Pattern: consumed___ indicates that the byte was already consumed and does not diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 0f33f52..1425b86 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -22,7 +22,7 @@ #ifndef X86DISASSEMBLERDECODERCOMMON_H #define X86DISASSEMBLERDECODERCOMMON_H -#include "llvm/System/DataTypes.h" +#include "llvm/Support/DataTypes.h" #define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers #define CONTEXTS_SYM x86DisassemblerContexts @@ -248,6 +248,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_M64, "8-byte") \ ENUM_ENTRY(TYPE_LEA, "Effective address") \ ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \ + ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \ ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \ ENUM_ENTRY(TYPE_M1632, "2+4-byte") \ ENUM_ENTRY(TYPE_M1664, "2+8-byte") \ diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/InstPrinter/CMakeLists.txt index 033973e..033973e 100644 --- a/lib/Target/X86/AsmPrinter/CMakeLists.txt +++ b/lib/Target/X86/InstPrinter/CMakeLists.txt diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/InstPrinter/Makefile index c82aa33..c82aa33 100644 --- a/lib/Target/X86/AsmPrinter/Makefile +++ b/lib/Target/X86/InstPrinter/Makefile diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 554b96c..d6950f4 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -25,10 +25,8 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. -#define MachineInstr MCInst #define GET_INSTRUCTION_NAME #include "X86GenAsmWriter.inc" -#undef MachineInstr void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { printInstruction(MI, OS); diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index eb98664..eb98664 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index da9d5a3..12144e3 100644 --- a/lib/Target/X86/AsmPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -16,7 +16,7 @@ #include "X86GenInstrNames.inc" #include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" -#include "../X86ShuffleDecode.h" +#include "../Utils/X86ShuffleDecode.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.h b/lib/Target/X86/InstPrinter/X86InstComments.h index 6b86db4..6b86db4 100644 --- a/lib/Target/X86/AsmPrinter/X86InstComments.h +++ b/lib/Target/X86/InstPrinter/X86InstComments.h diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 5625b0e..0484529 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -21,13 +21,12 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "X86GenInstrNames.inc" +#include <cctype> using namespace llvm; // Include the auto-generated portion of the assembly writer. -#define MachineInstr MCInst #define GET_INSTRUCTION_NAME #include "X86GenAsmWriter1.inc" -#undef MachineInstr void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { printInstruction(MI, OS); diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index 6f12032..6f12032 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index f4ff894..12fb090 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -20,6 +20,6 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ X86GenCallingConv.inc X86GenSubtarget.inc \ X86GenEDInfo.inc -DIRS = AsmPrinter AsmParser Disassembler TargetInfo +DIRS = InstPrinter AsmParser Disassembler TargetInfo Utils include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index f96b22f..f16ec02 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -20,7 +20,28 @@ __m128i shift_right(__m128i value, unsigned long offset) { //===---------------------------------------------------------------------===// SSE has instructions for doing operations on complex numbers, we should pattern -match them. Compiling this: +match them. For example, this should turn into a horizontal add: + +typedef float __attribute__((vector_size(16))) v4f32; +float f32(v4f32 A) { + return A[0]+A[1]+A[2]+A[3]; +} + +Instead we get this: + +_f32: ## @f32 + pshufd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0,0,0] + addss %xmm0, %xmm1 + pshufd $3, %xmm0, %xmm2 ## xmm2 = xmm0[3,0,0,0] + movhlps %xmm0, %xmm0 ## xmm0 = xmm0[1,1] + movaps %xmm0, %xmm3 + addss %xmm1, %xmm3 + movdqa %xmm2, %xmm0 + addss %xmm3, %xmm0 + ret + +Also, there are cases where some simple local SLP would improve codegen a bit. +compiling this: _Complex float f32(_Complex float A, _Complex float B) { return A+B; @@ -28,19 +49,17 @@ _Complex float f32(_Complex float A, _Complex float B) { into: -_f32: +_f32: ## @f32 movdqa %xmm0, %xmm2 addss %xmm1, %xmm2 - pshufd $16, %xmm2, %xmm2 - pshufd $1, %xmm1, %xmm1 - pshufd $1, %xmm0, %xmm0 - addss %xmm1, %xmm0 - pshufd $16, %xmm0, %xmm1 - movdqa %xmm2, %xmm0 - unpcklps %xmm1, %xmm0 + pshufd $1, %xmm1, %xmm1 ## xmm1 = xmm1[1,0,0,0] + pshufd $1, %xmm0, %xmm3 ## xmm3 = xmm0[1,0,0,0] + addss %xmm1, %xmm3 + movaps %xmm2, %xmm0 + unpcklps %xmm3, %xmm0 ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] ret -seems silly. +seems silly when it could just be one addps. //===---------------------------------------------------------------------===// @@ -904,4 +923,15 @@ The insertps's of $0 are pointless complex copies. //===---------------------------------------------------------------------===// +If SSE4.1 is available we should inline rounding functions instead of emitting +a libcall. + +floor: roundsd $0x01, %xmm, %xmm +ceil: roundsd $0x02, %xmm, %xmm +and likewise for the single precision versions. + +Currently, SelectionDAGBuilder doesn't turn calls to these functions into the +corresponding nodes and some targets (including X86) aren't ready for them. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index 78c4dc0..e21d69a 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -41,50 +41,6 @@ saved a few instructions. //===---------------------------------------------------------------------===// -Poor codegen: - -int X[2]; -int b; -void test(void) { - memset(X, b, 2*sizeof(X[0])); -} - -llc: - movq _b@GOTPCREL(%rip), %rax - movzbq (%rax), %rax - movq %rax, %rcx - shlq $8, %rcx - orq %rax, %rcx - movq %rcx, %rax - shlq $16, %rax - orq %rcx, %rax - movq %rax, %rcx - shlq $32, %rcx - movq _X@GOTPCREL(%rip), %rdx - orq %rax, %rcx - movq %rcx, (%rdx) - ret - -gcc: - movq _b@GOTPCREL(%rip), %rax - movabsq $72340172838076673, %rdx - movzbq (%rax), %rax - imulq %rdx, %rax - movq _X@GOTPCREL(%rip), %rdx - movq %rax, (%rdx) - ret - -And the codegen is even worse for the following -(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103): - void fill1(char *s, int a) - { - __builtin_memset(s, a, 15); - } - -For this version, we duplicate the computation of the constant to store. - -//===---------------------------------------------------------------------===// - It's not possible to reference AH, BH, CH, and DH registers in an instruction requiring REX prefix. However, divb and mulb both produce results in AH. If isel emits a CopyFromReg which gets turned into a movb and that can be allocated a diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index a305ae6..c10e170 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -67,19 +67,6 @@ cmovs, we should expand to a conditional branch like GCC produces. //===---------------------------------------------------------------------===// -Compile this: -_Bool f(_Bool a) { return a!=1; } - -into: - movzbl %dil, %eax - xorl $1, %eax - ret - -(Although note that this isn't a legal way to express the code that llvm-gcc -currently generates for that function.) - -//===---------------------------------------------------------------------===// - Some isel ideas: 1. Dynamic programming based approach when compile time if not an @@ -109,6 +96,37 @@ It appears icc use push for parameter passing. Need to investigate. //===---------------------------------------------------------------------===// +This: + +void foo(void); +void bar(int x, int *P) { + x >>= 2; + if (x) + foo(); + *P = x; +} + +compiles into: + + movq %rsi, %rbx + movl %edi, %r14d + sarl $2, %r14d + testl %r14d, %r14d + je LBB0_2 + +Instead of doing an explicit test, we can use the flags off the sar. This +occurs in a bigger testcase like this, which is pretty common: + +#include <vector> +int test1(std::vector<int> &X) { + int Sum = 0; + for (long i = 0, e = X.size(); i != e; ++i) + X[i] = 0; + return Sum; +} + +//===---------------------------------------------------------------------===// + Only use inc/neg/not instructions on processors where they are faster than add/sub/xor. They are slower on the P4 due to only updating some processor flags. @@ -394,72 +412,8 @@ boundary to improve performance. //===---------------------------------------------------------------------===// -Codegen: - -int f(int a, int b) { - if (a == 4 || a == 6) - b++; - return b; -} - - -as: - -or eax, 2 -cmp eax, 6 -jz label - -//===---------------------------------------------------------------------===// - GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting -simplifications for integer "x cmp y ? a : b". For example, instead of: - -int G; -void f(int X, int Y) { - G = X < 0 ? 14 : 13; -} - -compiling to: - -_f: - movl $14, %eax - movl $13, %ecx - movl 4(%esp), %edx - testl %edx, %edx - cmovl %eax, %ecx - movl %ecx, _G - ret - -it could be: -_f: - movl 4(%esp), %eax - sarl $31, %eax - notl %eax - addl $14, %eax - movl %eax, _G - ret - -etc. - -Another is: -int usesbb(unsigned int a, unsigned int b) { - return (a < b ? -1 : 0); -} -to: -_usesbb: - movl 8(%esp), %eax - cmpl %eax, 4(%esp) - sbbl %eax, %eax - ret - -instead of: -_usesbb: - xorl %eax, %eax - movl 8(%esp), %ecx - cmpl %ecx, 4(%esp) - movl $4294967295, %ecx - cmovb %ecx, %eax - ret +simplifications for integer "x cmp y ? a : b". //===---------------------------------------------------------------------===// @@ -756,23 +710,17 @@ This: { return !full_add(a, b).second; } Should compile to: + addl %esi, %edi + setae %al + movzbl %al, %eax + ret - - _Z11no_overflowjj: - addl %edi, %esi - setae %al - ret - -FIXME: That code looks wrong; bool return is normally defined as zext. - -on x86-64, not: - -__Z11no_overflowjj: - addl %edi, %esi - cmpl %edi, %esi - setae %al - movzbl %al, %eax - ret +on x86-64, instead of the rather stupid-looking: + addl %esi, %edi + setb %al + xorb $1, %al + movzbl %al, %eax + ret //===---------------------------------------------------------------------===// @@ -1040,10 +988,10 @@ _foo: instead of: _foo: - movl $255, %eax - orl 4(%esp), %eax - andl $65535, %eax - ret + movl $65280, %eax + andl 4(%esp), %eax + orl $255, %eax + ret //===---------------------------------------------------------------------===// @@ -1165,58 +1113,6 @@ abs: //===---------------------------------------------------------------------===// -Consider: -int test(unsigned long a, unsigned long b) { return -(a < b); } - -We currently compile this to: - -define i32 @test(i32 %a, i32 %b) nounwind { - %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1] - %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - %tmp5 = sub i32 0, %tmp34 ; <i32> [#uses=1] - ret i32 %tmp5 -} - -and - -_test: - movl 8(%esp), %eax - cmpl %eax, 4(%esp) - setb %al - movzbl %al, %eax - negl %eax - ret - -Several deficiencies here. First, we should instcombine zext+neg into sext: - -define i32 @test2(i32 %a, i32 %b) nounwind { - %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1] - %tmp34 = sext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp34 -} - -However, before we can do that, we have to fix the bad codegen that we get for -sext from bool: - -_test2: - movl 8(%esp), %eax - cmpl %eax, 4(%esp) - setb %al - movzbl %al, %eax - shll $31, %eax - sarl $31, %eax - ret - -This code should be at least as good as the code above. Once this is fixed, we -can optimize this specific case even more to: - - movl 8(%esp), %eax - xorl %ecx, %ecx - cmpl %eax, 4(%esp) - sbbl %ecx, %ecx - -//===---------------------------------------------------------------------===// - Take the following code (from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541): @@ -1605,6 +1501,8 @@ loop, the value comes into the loop as two values, and RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the constructed BUILD_PAIR which represents the cast value. +This can be handled by making CodeGenPrepare sink the cast. + //===---------------------------------------------------------------------===// Test instructions can be eliminated by using EFLAGS values from arithmetic @@ -1736,46 +1634,6 @@ Ideal output: //===---------------------------------------------------------------------===// -Testcase: -int x(int a) { return (a & 0x80) ? 0x100 : 0; } -int y(int a) { return (a & 0x80) *2; } - -Current: - testl $128, 4(%esp) - setne %al - movzbl %al, %eax - shll $8, %eax - ret - -Better: - movl 4(%esp), %eax - addl %eax, %eax - andl $256, %eax - ret - -This is another general instcombine transformation that is profitable on all -targets. In LLVM IR, these functions look like this: - -define i32 @x(i32 %a) nounwind readnone { -entry: - %0 = and i32 %a, 128 - %1 = icmp eq i32 %0, 0 - %iftmp.0.0 = select i1 %1, i32 0, i32 256 - ret i32 %iftmp.0.0 -} - -define i32 @y(i32 %a) nounwind readnone { -entry: - %0 = shl i32 %a, 1 - %1 = and i32 %0, 256 - ret i32 %1 -} - -Replacing an icmp+select with a shift should always be considered profitable in -instcombine. - -//===---------------------------------------------------------------------===// - Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch properly. @@ -1960,3 +1818,100 @@ load, making it non-trivial to determine if there's anything between the load and the store which would prohibit narrowing. //===---------------------------------------------------------------------===// + +This code: +void foo(unsigned x) { + if (x == 0) bar(); + else if (x == 1) qux(); +} + +currently compiles into: +_foo: + movl 4(%esp), %eax + cmpl $1, %eax + je LBB0_3 + testl %eax, %eax + jne LBB0_4 + +the testl could be removed: +_foo: + movl 4(%esp), %eax + cmpl $1, %eax + je LBB0_3 + jb LBB0_4 + +0 is the only unsigned number < 1. + +//===---------------------------------------------------------------------===// + +This code: + +%0 = type { i32, i1 } + +define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp { +entry: + %uadd = tail call %0 @llvm.uadd.with.overflow.i32(i32 %sum, i32 %x) + %cmp = extractvalue %0 %uadd, 1 + %inc = zext i1 %cmp to i32 + %add = add i32 %x, %sum + %z.0 = add i32 %add, %inc + ret i32 %z.0 +} + +declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + +compiles to: + +_add32carry: ## @add32carry + addl %esi, %edi + sbbl %ecx, %ecx + movl %edi, %eax + subl %ecx, %eax + ret + +But it could be: + +_add32carry: + leal (%rsi,%rdi), %eax + cmpl %esi, %eax + adcl $0, %eax + ret + +//===---------------------------------------------------------------------===// + +This: +char t(char c) { + return c/3; +} + +Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer + +_t: ## @t + movslq %edi, %rax + imulq $-1431655765, %rax, %rcx ## imm = 0xFFFFFFFFAAAAAAAB + shrq $32, %rcx + addl %ecx, %eax + movl %eax, %ecx + shrl $31, %ecx + shrl %eax + addl %ecx, %eax + movsbl %al, %eax + ret + +GCC gets: + +_t: + movl $86, %eax + imulb %dil + shrw $8, %ax + sarb $7, %dil + subb %dil, %al + movsbl %al,%eax + ret + +which is nicer. This also happens for int, not just char. + +//===---------------------------------------------------------------------===// + + + diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt new file mode 100644 index 0000000..3ad5f99 --- /dev/null +++ b/lib/Target/X86/Utils/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMX86Utils + X86ShuffleDecode.cpp + ) +add_dependencies(LLVMX86Utils X86CodeGenTable_gen) diff --git a/lib/Target/X86/Utils/Makefile b/lib/Target/X86/Utils/Makefile new file mode 100644 index 0000000..1df6f0f --- /dev/null +++ b/lib/Target/X86/Utils/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86Utils + +# Hack: we need to include 'main' x86 target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index df04052..1287977 100644 --- a/lib/Target/X86/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -12,21 +12,14 @@ // //===----------------------------------------------------------------------===// -#ifndef X86_SHUFFLE_DECODE_H -#define X86_SHUFFLE_DECODE_H - -#include "llvm/ADT/SmallVector.h" -using namespace llvm; +#include "X86ShuffleDecode.h" //===----------------------------------------------------------------------===// // Vector Mask Decoding //===----------------------------------------------------------------------===// -enum { - SM_SentinelZero = ~0U -}; +namespace llvm { -static inline void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { // Defaults the copying the dest value. ShuffleMask.push_back(0); @@ -51,8 +44,8 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { } // <3,1> or <6,7,2,3> -static void DecodeMOVHLPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeMOVHLPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { for (unsigned i = NElts/2; i != NElts; ++i) ShuffleMask.push_back(NElts+i); @@ -61,8 +54,8 @@ static void DecodeMOVHLPSMask(unsigned NElts, } // <0,2> or <0,1,4,5> -static void DecodeMOVLHPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeMOVLHPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { for (unsigned i = 0; i != NElts/2; ++i) ShuffleMask.push_back(i); @@ -70,16 +63,16 @@ static void DecodeMOVLHPSMask(unsigned NElts, ShuffleMask.push_back(NElts+i); } -static void DecodePSHUFMask(unsigned NElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodePSHUFMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { for (unsigned i = 0; i != NElts; ++i) { ShuffleMask.push_back(Imm % NElts); Imm /= NElts; } } -static void DecodePSHUFHWMask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodePSHUFHWMask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { ShuffleMask.push_back(0); ShuffleMask.push_back(1); ShuffleMask.push_back(2); @@ -90,8 +83,8 @@ static void DecodePSHUFHWMask(unsigned Imm, } } -static void DecodePSHUFLWMask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodePSHUFLWMask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { for (unsigned i = 0; i != 4; ++i) { ShuffleMask.push_back((Imm & 3)); Imm >>= 2; @@ -102,24 +95,24 @@ static void DecodePSHUFLWMask(unsigned Imm, ShuffleMask.push_back(7); } -static void DecodePUNPCKLMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodePUNPCKLMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { for (unsigned i = 0; i != NElts/2; ++i) { ShuffleMask.push_back(i); ShuffleMask.push_back(i+NElts); } } -static void DecodePUNPCKHMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodePUNPCKHMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { for (unsigned i = 0; i != NElts/2; ++i) { ShuffleMask.push_back(i+NElts/2); ShuffleMask.push_back(i+NElts+NElts/2); } } -static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { // Part that reads from dest. for (unsigned i = 0; i != NElts/2; ++i) { ShuffleMask.push_back(Imm % NElts); @@ -132,8 +125,8 @@ static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, } } -static void DecodeUNPCKHPMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeUNPCKHPMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { for (unsigned i = 0; i != NElts/2; ++i) { ShuffleMask.push_back(i+NElts/2); // Reads from dest ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src @@ -144,12 +137,12 @@ static void DecodeUNPCKHPMask(unsigned NElts, /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. NElts indicates the number of elements in the vector allowing it to /// handle different datatypes and vector widths. -static void DecodeUNPCKLPMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeUNPCKLPMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { for (unsigned i = 0; i != NElts/2; ++i) { ShuffleMask.push_back(i); // Reads from dest ShuffleMask.push_back(i+NElts); // Reads from src } } -#endif +} // llvm namespace diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h new file mode 100644 index 0000000..50d9ccb --- /dev/null +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -0,0 +1,69 @@ +//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Define several functions to decode x86 specific shuffle semantics into a +// generic vector mask. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_SHUFFLE_DECODE_H +#define X86_SHUFFLE_DECODE_H + +#include "llvm/ADT/SmallVector.h" + +//===----------------------------------------------------------------------===// +// Vector Mask Decoding +//===----------------------------------------------------------------------===// + +namespace llvm { +enum { + SM_SentinelZero = ~0U +}; + +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); + +// <3,1> or <6,7,2,3> +void DecodeMOVHLPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +// <0,2> or <0,1,4,5> +void DecodeMOVLHPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePSHUFMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePSHUFHWMask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePSHUFLWMask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePUNPCKLMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePUNPCKHMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodeUNPCKHPMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + + +/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd +/// etc. NElts indicates the number of elements in the vector allowing it to +/// handle different datatypes and vector widths. +void DecodeUNPCKLPMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +} // llvm namespace + +#endif diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 27e8850..0ca4366 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -15,6 +15,7 @@ #ifndef TARGET_X86_H #define TARGET_X86_H +#include "llvm/Support/DataTypes.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -23,11 +24,13 @@ class FunctionPass; class JITCodeEmitter; class MCCodeEmitter; class MCContext; +class MCObjectWriter; class MachineCodeEmitter; class Target; class TargetAsmBackend; class X86TargetMachine; class formatted_raw_ostream; +class raw_ostream; /// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. @@ -74,6 +77,13 @@ FunctionPass *createEmitX86CodeToMemory(); /// FunctionPass *createX86MaxStackAlignmentHeuristicPass(); + +/// createX86MachObjectWriter - Construct an X86 Mach-O object writer. +MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); + extern Target TheX86_32Target, TheX86_64Target; } // End llvm namespace diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index a19f1ac..efb6c8c 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -23,6 +23,9 @@ include "llvm/Target/Target.td" def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", "Enable conditional move instructions">; +def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", + "Support POPCNT instruction">; + def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", "Enable MMX instructions">; @@ -45,7 +48,7 @@ def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41", [FeatureSSSE3]>; def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42", "Enable SSE 4.2 instructions", - [FeatureSSE41]>; + [FeatureSSE41, FeaturePOPCNT]>; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", "Enable 3DNow! instructions">; def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", @@ -63,7 +66,8 @@ def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem", "IsUAMemFast", "true", "Fast unaligned memory access">; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", - "Support SSE 4a instructions">; + "Support SSE 4a instructions", + [FeaturePOPCNT]>; def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true", "Enable AVX instructions">; @@ -112,11 +116,13 @@ def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, FeatureFastUAMem]>; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge -def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, - FeatureFastUAMem, FeatureAES]>; -// Sandy Bridge does not have FMA -// FIXME: Wikipedia says it does... it should have AES as well. -def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>; +def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, + FeatureFastUAMem, FeatureAES, FeatureCLMUL]>; +// SSE is not listed here since llvm treats AVX as a reimplementation of SSE, +// rather than a superset. +// FIXME: Disabling AVX for now since it's not ready. +def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit, + FeatureAES, FeatureCLMUL]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; @@ -176,7 +182,7 @@ include "X86CallingConv.td" //===----------------------------------------------------------------------===// -// Assembly Printers +// Assembly Parser //===----------------------------------------------------------------------===// // Currently the X86 assembly parser only supports ATT syntax. @@ -191,15 +197,21 @@ def ATTAsmParser : AsmParser { string RegisterPrefix = "%"; } +//===----------------------------------------------------------------------===// +// Assembly Printers +//===----------------------------------------------------------------------===// + // The X86 target supports two different syntaxes for emitting machine code. // This is controlled by the -x86-asm-syntax={att|intel} def ATTAsmWriter : AsmWriter { string AsmWriterClassName = "ATTInstPrinter"; int Variant = 0; + bit isMCAsmWriter = 1; } def IntelAsmWriter : AsmWriter { string AsmWriterClassName = "IntelInstPrinter"; int Variant = 1; + bit isMCAsmWriter = 1; } def X86 : Target { diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 69dc967..da5f5b1 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -11,50 +11,83 @@ #include "X86.h" #include "X86FixupKinds.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/ELFObjectWriter.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MachObjectWriter.h" +#include "llvm/Object/MachOFormat.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmBackend.h" using namespace llvm; - static unsigned getFixupKindLog2Size(unsigned Kind) { switch (Kind) { default: assert(0 && "invalid fixup kind!"); - case X86::reloc_pcrel_1byte: + case FK_PCRel_1: case FK_Data_1: return 0; - case X86::reloc_pcrel_2byte: + case FK_PCRel_2: case FK_Data_2: return 1; - case X86::reloc_pcrel_4byte: + case FK_PCRel_4: case X86::reloc_riprel_4byte: case X86::reloc_riprel_4byte_movq_load: + case X86::reloc_signed_4byte: + case X86::reloc_global_offset_table: case FK_Data_4: return 2; + case FK_PCRel_8: case FK_Data_8: return 3; } } namespace { + +class X86ELFObjectWriter : public MCELFObjectTargetWriter { +public: + X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine, + bool HasRelocationAddend) + : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {} +}; + class X86AsmBackend : public TargetAsmBackend { public: X86AsmBackend(const Target &T) - : TargetAsmBackend(T) {} + : TargetAsmBackend() {} + + unsigned getNumFixupKinds() const { + return X86::NumTargetFixupKinds; + } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = { + { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, + { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel}, + { "reloc_signed_4byte", 0, 4 * 8, 0}, + { "reloc_global_offset_table", 0, 4 * 8, 0} + }; + + if (Kind < FirstTargetFixupKind) + return TargetAsmBackend::getFixupKindInfo(Kind); - void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF, + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } + + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); - assert(Fixup.getOffset() + Size <= DF.getContents().size() && + assert(Fixup.getOffset() + Size <= DataSize && "Invalid fixup offset!"); for (unsigned i = 0; i != Size; ++i) - DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); + Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); } bool MayNeedRelaxation(const MCInst &Inst) const; @@ -63,9 +96,9 @@ public: bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; }; -} // end anonymous namespace +} // end anonymous namespace -static unsigned getRelaxedOpcode(unsigned Op) { +static unsigned getRelaxedOpcodeBranch(unsigned Op) { switch (Op) { default: return Op; @@ -90,16 +123,104 @@ static unsigned getRelaxedOpcode(unsigned Op) { } } +static unsigned getRelaxedOpcodeArith(unsigned Op) { + switch (Op) { + default: + return Op; + + // IMUL + case X86::IMUL16rri8: return X86::IMUL16rri; + case X86::IMUL16rmi8: return X86::IMUL16rmi; + case X86::IMUL32rri8: return X86::IMUL32rri; + case X86::IMUL32rmi8: return X86::IMUL32rmi; + case X86::IMUL64rri8: return X86::IMUL64rri32; + case X86::IMUL64rmi8: return X86::IMUL64rmi32; + + // AND + case X86::AND16ri8: return X86::AND16ri; + case X86::AND16mi8: return X86::AND16mi; + case X86::AND32ri8: return X86::AND32ri; + case X86::AND32mi8: return X86::AND32mi; + case X86::AND64ri8: return X86::AND64ri32; + case X86::AND64mi8: return X86::AND64mi32; + + // OR + case X86::OR16ri8: return X86::OR16ri; + case X86::OR16mi8: return X86::OR16mi; + case X86::OR32ri8: return X86::OR32ri; + case X86::OR32mi8: return X86::OR32mi; + case X86::OR64ri8: return X86::OR64ri32; + case X86::OR64mi8: return X86::OR64mi32; + + // XOR + case X86::XOR16ri8: return X86::XOR16ri; + case X86::XOR16mi8: return X86::XOR16mi; + case X86::XOR32ri8: return X86::XOR32ri; + case X86::XOR32mi8: return X86::XOR32mi; + case X86::XOR64ri8: return X86::XOR64ri32; + case X86::XOR64mi8: return X86::XOR64mi32; + + // ADD + case X86::ADD16ri8: return X86::ADD16ri; + case X86::ADD16mi8: return X86::ADD16mi; + case X86::ADD32ri8: return X86::ADD32ri; + case X86::ADD32mi8: return X86::ADD32mi; + case X86::ADD64ri8: return X86::ADD64ri32; + case X86::ADD64mi8: return X86::ADD64mi32; + + // SUB + case X86::SUB16ri8: return X86::SUB16ri; + case X86::SUB16mi8: return X86::SUB16mi; + case X86::SUB32ri8: return X86::SUB32ri; + case X86::SUB32mi8: return X86::SUB32mi; + case X86::SUB64ri8: return X86::SUB64ri32; + case X86::SUB64mi8: return X86::SUB64mi32; + + // CMP + case X86::CMP16ri8: return X86::CMP16ri; + case X86::CMP16mi8: return X86::CMP16mi; + case X86::CMP32ri8: return X86::CMP32ri; + case X86::CMP32mi8: return X86::CMP32mi; + case X86::CMP64ri8: return X86::CMP64ri32; + case X86::CMP64mi8: return X86::CMP64mi32; + + // PUSH + case X86::PUSHi8: return X86::PUSHi32; + } +} + +static unsigned getRelaxedOpcode(unsigned Op) { + unsigned R = getRelaxedOpcodeArith(Op); + if (R != Op) + return R; + return getRelaxedOpcodeBranch(Op); +} + bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const { + // Branches can always be relaxed. + if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode()) + return true; + // Check if this instruction is ever relaxable. - if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode()) + if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode()) return false; - // If so, just assume it can be relaxed. Once we support relaxing more complex - // instructions we should check that the instruction actually has symbolic - // operands before doing this, but we need to be careful about things like - // PCrel. - return true; + + // Check if it has an expression and is not RIP relative. + bool hasExp = false; + bool hasRIP = false; + for (unsigned i = 0; i < Inst.getNumOperands(); ++i) { + const MCOperand &Op = Inst.getOperand(i); + if (Op.isExpr()) + hasExp = true; + + if (Op.isReg() && Op.getReg() == X86::RIP) + hasRIP = true; + } + + // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on + // how we do relaxations? + return hasExp && !hasRIP; } // FIXME: Can tblgen help at all here to verify there aren't other instructions @@ -123,10 +244,8 @@ void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { /// WriteNopData - Write optimal nops to the output file for the \arg Count /// bytes. This returns the number of bytes written. It may return 0 if /// the \arg Count is more than the maximum optimal nops. -/// -/// FIXME this is X86 32-bit specific and should move to a better place. bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { - static const uint8_t Nops[16][16] = { + static const uint8_t Nops[10][10] = { // nop {0x90}, // xchg %ax,%ax @@ -147,32 +266,16 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, // nopw %cs:0L(%[re]ax,%[re]ax,1) {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, - // nopl 0(%[re]ax,%[re]ax,1) - // nopw 0(%[re]ax,%[re]ax,1) - {0x0f, 0x1f, 0x44, 0x00, 0x00, - 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, - // nopw 0(%[re]ax,%[re]ax,1) - // nopw 0(%[re]ax,%[re]ax,1) - {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, - 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, - // nopw 0(%[re]ax,%[re]ax,1) - // nopl 0L(%[re]ax) */ - {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, - 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, - // nopl 0L(%[re]ax) - // nopl 0L(%[re]ax) - {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, - 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, - // nopl 0L(%[re]ax) - // nopl 0L(%[re]ax,%[re]ax,1) - {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, - 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00} }; // Write an optimal sequence for the first 15 bytes. - uint64_t OptimalCount = (Count < 16) ? Count : 15; - for (uint64_t i = 0, e = OptimalCount; i != e; i++) - OW->Write8(Nops[OptimalCount - 1][i]); + const uint64_t OptimalCount = (Count < 16) ? Count : 15; + const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10; + for (uint64_t i = 0, e = Prefixes; i != e; i++) + OW->Write8(0x66); + const uint64_t Rest = OptimalCount - Prefixes; + for (uint64_t i = 0, e = Rest; i != e; i++) + OW->Write8(Nops[Rest - 1][i]); // Finish with single byte nops. for (uint64_t i = OptimalCount, e = Count; i != e; ++i) @@ -186,75 +289,60 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { namespace { class ELFX86AsmBackend : public X86AsmBackend { public: - ELFX86AsmBackend(const Target &T) - : X86AsmBackend(T) { - HasAbsolutizedSet = true; - HasScatteredSymbols = true; + Triple::OSType OSType; + ELFX86AsmBackend(const Target &T, Triple::OSType _OSType) + : X86AsmBackend(T), OSType(_OSType) { + HasReliableSymbolDifference = true; } - bool isVirtualSection(const MCSection &Section) const { - const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section); - return SE.getType() == MCSectionELF::SHT_NOBITS;; + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { + const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section); + return ES.getFlags() & ELF::SHF_MERGE; } }; class ELFX86_32AsmBackend : public ELFX86AsmBackend { public: - ELFX86_32AsmBackend(const Target &T) - : ELFX86AsmBackend(T) {} + ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType) + : ELFX86AsmBackend(T, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return new ELFObjectWriter(OS, /*Is64Bit=*/false, - /*IsLittleEndian=*/true, - /*HasRelocationAddend=*/false); + return createELFObjectWriter(new X86ELFObjectWriter(false, OSType, + ELF::EM_386, false), + OS, /*IsLittleEndian*/ true); } }; class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: - ELFX86_64AsmBackend(const Target &T) - : ELFX86AsmBackend(T) {} + ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType) + : ELFX86AsmBackend(T, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return new ELFObjectWriter(OS, /*Is64Bit=*/true, - /*IsLittleEndian=*/true, - /*HasRelocationAddend=*/true); + return createELFObjectWriter(new X86ELFObjectWriter(true, OSType, + ELF::EM_X86_64, true), + OS, /*IsLittleEndian*/ true); } }; class WindowsX86AsmBackend : public X86AsmBackend { bool Is64Bit; + public: WindowsX86AsmBackend(const Target &T, bool is64Bit) : X86AsmBackend(T) , Is64Bit(is64Bit) { - HasScatteredSymbols = true; } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createWinCOFFObjectWriter(OS, Is64Bit); } - - bool isVirtualSection(const MCSection &Section) const { - const MCSectionCOFF &SE = static_cast<const MCSectionCOFF&>(Section); - return SE.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; - } }; class DarwinX86AsmBackend : public X86AsmBackend { public: DarwinX86AsmBackend(const Target &T) - : X86AsmBackend(T) { - HasAbsolutizedSet = true; - HasScatteredSymbols = true; - } - - bool isVirtualSection(const MCSection &Section) const { - const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); - return (SMO.getType() == MCSectionMachO::S_ZEROFILL || - SMO.getType() == MCSectionMachO::S_GB_ZEROFILL || - SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL); - } + : X86AsmBackend(T) { } }; class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { @@ -263,7 +351,9 @@ public: : DarwinX86AsmBackend(T) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return new MachObjectWriter(OS, /*Is64Bit=*/false); + return createX86MachObjectWriter(OS, /*Is64Bit=*/false, + object::mach::CTM_i386, + object::mach::CSX86_ALL); } }; @@ -275,7 +365,9 @@ public: } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return new MachObjectWriter(OS, /*Is64Bit=*/true); + return createX86MachObjectWriter(OS, /*Is64Bit=*/true, + object::mach::CTM_x86_64, + object::mach::CSX86_ALL); } virtual bool doesSectionRequireSymbols(const MCSection &Section) const { @@ -312,7 +404,7 @@ public: } }; -} // end anonymous namespace +} // end anonymous namespace TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, const std::string &TT) { @@ -322,9 +414,12 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, case Triple::MinGW32: case Triple::Cygwin: case Triple::Win32: - return new WindowsX86AsmBackend(T, false); + if (Triple(TT).getEnvironment() == Triple::MachO) + return new DarwinX86_32AsmBackend(T); + else + return new WindowsX86AsmBackend(T, false); default: - return new ELFX86_32AsmBackend(T); + return new ELFX86_32AsmBackend(T, Triple(TT).getOS()); } } @@ -333,11 +428,14 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, switch (Triple(TT).getOS()) { case Triple::Darwin: return new DarwinX86_64AsmBackend(T); - case Triple::MinGW64: + case Triple::MinGW32: case Triple::Cygwin: case Triple::Win32: - return new WindowsX86AsmBackend(T, true); + if (Triple(TT).getEnvironment() == Triple::MachO) + return new DarwinX86_64AsmBackend(T); + else + return new WindowsX86AsmBackend(T, true); default: - return new ELFX86_64AsmBackend(T); + return new ELFX86_64AsmBackend(T, Triple(TT).getOS()); } } diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 20110ad..99b4479 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "X86AsmPrinter.h" -#include "AsmPrinter/X86ATTInstPrinter.h" -#include "AsmPrinter/X86IntelInstPrinter.h" +#include "InstPrinter/X86ATTInstPrinter.h" +#include "InstPrinter/X86IntelInstPrinter.h" #include "X86MCInstLower.h" #include "X86.h" #include "X86COFFMachineModuleInfo.h" @@ -48,21 +48,15 @@ using namespace llvm; // Primitive Helper Functions. //===----------------------------------------------------------------------===// -void X86AsmPrinter::PrintPICBaseSymbol(raw_ostream &O) const { - const TargetLowering *TLI = TM.getTargetLowering(); - O << *static_cast<const X86TargetLowering*>(TLI)->getPICBaseSymbol(MF, - OutContext); -} - /// runOnMachineFunction - Emit the function body. /// bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); - if (Subtarget->isTargetCOFF()) { + if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) { bool Intrn = MF.getFunction()->hasInternalLinkage(); OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC + OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL); OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT); @@ -95,7 +89,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, break; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); - + MCSymbol *GVSym; if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) GVSym = GetSymbolWithGlobalValueBase(GV, "$stub"); @@ -109,11 +103,11 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, // Handle dllimport linkage. if (MO.getTargetFlags() == X86II::MO_DLLIMPORT) GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName()); - + if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) { MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = + MachineModuleInfoImpl::StubValueTy &StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym); if (StubSym.getPointer() == 0) StubSym = MachineModuleInfoImpl:: @@ -133,7 +127,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, StubSym = MachineModuleInfoImpl:: StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); } - + // If the name begins with a dollar-sign, enclose it in parens. We do this // to avoid having it look like an integer immediate to the assembler. if (GVSym->getName()[0] != '$') @@ -149,7 +143,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, SmallString<128> TempNameStr; TempNameStr += StringRef(MO.getSymbolName()); TempNameStr += StringRef("$stub"); - + MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str()); MachineModuleInfoImpl::StubValueTy &StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); @@ -163,17 +157,17 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, } else { SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName()); } - + // If the name begins with a dollar-sign, enclose it in parens. We do this // to avoid having it look like an integer immediate to the assembler. - if (SymToPrint->getName()[0] != '$') + if (SymToPrint->getName()[0] != '$') O << *SymToPrint; else O << '(' << *SymToPrint << '('; break; } } - + switch (MO.getTargetFlags()) { default: llvm_unreachable("Unknown target flag on GV operand"); @@ -185,15 +179,12 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, // These affect the name of the symbol, not any suffix. break; case X86II::MO_GOT_ABSOLUTE_ADDRESS: - O << " + [.-"; - PrintPICBaseSymbol(O); - O << ']'; - break; + O << " + [.-" << *MF->getPICBaseSymbol() << ']'; + break; case X86II::MO_PIC_BASE_OFFSET: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: - O << '-'; - PrintPICBaseSymbol(O); + O << '-' << *MF->getPICBaseSymbol(); break; case X86II::MO_TLSGD: O << "@TLSGD"; break; case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break; @@ -206,8 +197,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, case X86II::MO_PLT: O << "@PLT"; break; case X86II::MO_TLVP: O << "@TLVP"; break; case X86II::MO_TLVP_PIC_BASE: - O << "@TLVP" << '-'; - PrintPICBaseSymbol(O); + O << "@TLVP" << '-' << *MF->getPICBaseSymbol(); break; } } @@ -262,7 +252,7 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, case MachineOperand::MO_JumpTableIndex: case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: { O << '$'; printSymbolOperand(MO, O); @@ -298,10 +288,10 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") && BaseReg.getReg() == X86::RIP) HasBaseReg = false; - + // HasParenPart - True if we will print out the () part of the mem ref. bool HasParenPart = IndexReg.getReg() || HasBaseReg; - + if (DispSpec.isImm()) { int DispVal = DispSpec.getImm(); if (DispVal || !HasParenPart) @@ -312,6 +302,9 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, printSymbolOperand(MI->getOperand(Op+3), O); } + if (Modifier && strcmp(Modifier, "H") == 0) + O << "+8"; + if (HasParenPart) { assert(IndexReg.getReg() != X86::ESP && "X86 doesn't allow scaling by ESP"); @@ -344,10 +337,8 @@ void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O) { - PrintPICBaseSymbol(O); - O << '\n'; - PrintPICBaseSymbol(O); - O << ':'; + O << *MF->getPICBaseSymbol() << '\n'; + O << *MF->getPICBaseSymbol() << ':'; } bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, @@ -386,14 +377,14 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. const MachineOperand &MO = MI->getOperand(OpNo); - + switch (ExtraCode[0]) { default: return true; // Unknown modifier. case 'a': // This is an address. Currently only 'i' and 'r' are expected. if (MO.isImm()) { O << MO.getImm(); return false; - } + } if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) { printSymbolOperand(MO, O); if (Subtarget->isPICStyleRIPRel()) @@ -470,6 +461,9 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, case 'q': // Print SImode register // These only apply to registers, ignore on mem. break; + case 'H': + printMemReference(MI, OpNo, O, "H"); + return false; case 'P': // Don't print @PLT, but do print as memory. printMemReference(MI, OpNo, O, "no-rip"); return false; @@ -480,23 +474,23 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, } void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { - if (Subtarget->isTargetDarwin()) + if (Subtarget->isTargetEnvMacho()) OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); } void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { - if (Subtarget->isTargetDarwin()) { + if (Subtarget->isTargetEnvMacho()) { // All darwin targets use mach-o. MachineModuleInfoMachO &MMIMacho = MMI->getObjFileInfo<MachineModuleInfoMachO>(); - + // Output stubs for dynamically-linked functions. MachineModuleInfoMachO::SymbolListTy Stubs; Stubs = MMIMacho.GetFnStubList(); if (!Stubs.empty()) { - const MCSection *TheSection = + const MCSection *TheSection = OutContext.getMachOSection("__IMPORT", "__jump_table", MCSectionMachO::S_SYMBOL_STUBS | MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE | @@ -514,7 +508,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { const char HltInsts[] = { -12, -12, -12, -12, -12 }; OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/); } - + Stubs.clear(); OutStreamer.AddBlankLine(); } @@ -522,7 +516,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // Output stubs for external and common global variables. Stubs = MMIMacho.GetGVStubList(); if (!Stubs.empty()) { - const MCSection *TheSection = + const MCSection *TheSection = OutContext.getMachOSection("__IMPORT", "__pointers", MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, SectionKind::getMetadata()); @@ -580,7 +574,14 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } - if (Subtarget->isTargetCOFF()) { + if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() && + MMI->callsExternalVAFunctionWithFloatingPointArguments()) { + StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused"; + MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName); + OutStreamer.EmitSymbolAttribute(S, MCSA_Global); + } + + if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) { X86COFFMachineModuleInfo &COFFMMI = MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); @@ -661,12 +662,12 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } -MachineLocation +MachineLocation X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { MachineLocation Location; assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!"); // Frame address. Currently handles register +- offset only. - + if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm()) Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); else { @@ -690,9 +691,9 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, O << V.getName(); O << " <- "; // Frame address. Currently handles register +- offset only. - O << '['; + O << '['; if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) - printOperand(MI, 0, O); + printOperand(MI, 0, O); else O << "undef"; O << '+'; printOperand(MI, 3, O); @@ -718,10 +719,10 @@ static MCInstPrinter *createX86MCInstPrinter(const Target &T, } // Force static initialization. -extern "C" void LLVMInitializeX86AsmPrinter() { +extern "C" void LLVMInitializeX86AsmPrinter() { RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target); RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target); - + TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter); TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter); } diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index e61be66..3a50435 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -75,8 +75,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O); - void PrintPICBaseSymbol(raw_ostream &O) const; - bool runOnMachineFunction(MachineFunction &F); void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index e3409ef..a44fb69 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -48,7 +48,7 @@ def RetCC_X86Common : CallingConv<[ // MMX vector types are always returned in MM0. If the target doesn't have // MM0, it doesn't support these vector types. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>, + CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>, // Long double types are always returned in ST0 (even with SSE). CCIfType<[f80], CCAssignToReg<[ST0, ST1]>> @@ -61,7 +61,7 @@ def RetCC_X86_32_C : CallingConv<[ // weirdly; this is really the sse-regparm calling convention) in which // case they use XMM0, otherwise it is the same as the common X86 calling // conv. - CCIfInReg<CCIfSubtarget<"hasSSE2()", + CCIfInReg<CCIfSubtarget<"hasXMMInt()", CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>, CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>, CCDelegateTo<RetCC_X86Common> @@ -73,8 +73,8 @@ def RetCC_X86_32_Fast : CallingConv<[ // SSE2. // This can happen when a float, 2 x float, or 3 x float vector is split by // target lowering, and is returned in 1-3 sse regs. - CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, - CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, + CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, + CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, // For integers, ECX can be used as an extra return register CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>, @@ -95,14 +95,14 @@ def RetCC_X86_64_C : CallingConv<[ // returned in RAX. This disagrees with ABI documentation but is bug // compatible with gcc. CCIfType<[v1i64], CCAssignToReg<[RAX]>>, - CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>, + CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>, CCDelegateTo<RetCC_X86Common> ]>; // X86-Win64 C return-value convention. def RetCC_X86_Win64_C : CallingConv<[ // The X86-Win64 calling convention always returns __m64 values in RAX. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>, + CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>, // And FP in XMM0 only. CCIfType<[f32], CCAssignToReg<[XMM0]>>, @@ -161,14 +161,14 @@ def CC_X86_64_C : CallingConv<[ // The first 8 MMX (except for v1i64) vector arguments are passed in XMM // registers on Darwin. - CCIfType<[v8i8, v4i16, v2i32], + CCIfType<[x86mmx], CCIfSubtarget<"isTargetDarwin()", - CCIfSubtarget<"hasSSE2()", + CCIfSubtarget<"hasXMMInt()", CCPromoteToType<v2i64>>>>, // The first 8 FP/Vector arguments are passed in XMM registers. CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfSubtarget<"hasSSE1()", + CCIfSubtarget<"hasXMM()", CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, // The first 8 256-bit vector arguments are passed in YMM registers. @@ -192,7 +192,7 @@ def CC_X86_64_C : CallingConv<[ CCAssignToStack<32, 32>>, // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> + CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>> ]>; // Calling convention used on Win64 @@ -210,8 +210,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>, // The first 4 MMX vector arguments are passed in GPRs. - CCIfType<[v8i8, v4i16, v2i32, v1i64], - CCBitConvertToType<i64>>, + CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>, // The first 4 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ], @@ -233,7 +232,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[f80], CCAssignToStack<0, 0>>, // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> + CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>> ]>; def CC_X86_64_GHC : CallingConv<[ @@ -246,7 +245,7 @@ def CC_X86_64_GHC : CallingConv<[ // Pass in STG registers: F1, F2, F3, F4, D1, D2 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfSubtarget<"hasSSE1()", + CCIfSubtarget<"hasXMM()", CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>> ]>; @@ -264,12 +263,12 @@ def CC_X86_32_Common : CallingConv<[ // The first 3 float or double arguments, if marked 'inreg' and if the call // is not a vararg call and if SSE2 is available, are passed in SSE registers. CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64], - CCIfSubtarget<"hasSSE2()", + CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>, // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx // registers if the call is not a vararg call. - CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32], + CCIfNotVarArg<CCIfType<[x86mmx], CCAssignToReg<[MM0, MM1, MM2]>>>, // Integer/Float values get stored in stack slots that are 4 bytes in @@ -300,7 +299,7 @@ def CC_X86_32_Common : CallingConv<[ // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are // passed in the parameter area. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>; + CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>; def CC_X86_32_C : CallingConv<[ // Promote i8/i16 arguments to i32. @@ -363,7 +362,7 @@ def CC_X86_32_FastCC : CallingConv<[ // The first 3 float or double arguments, if the call is not a vararg // call and if SSE2 is available, are passed in SSE registers. CCIfNotVarArg<CCIfType<[f32,f64], - CCIfSubtarget<"hasSSE2()", + CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>>, // Doubles get 8-byte slots that are 8-byte aligned. @@ -380,3 +379,35 @@ def CC_X86_32_GHC : CallingConv<[ // Pass in STG registers: Base, Sp, Hp, R1 CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>> ]>; + +//===----------------------------------------------------------------------===// +// X86 Root Argument Calling Conventions +//===----------------------------------------------------------------------===// + +// This is the root argument convention for the X86-32 backend. +def CC_X86_32 : CallingConv<[ + CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>, + CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>, + CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>, + CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>, + + // Otherwise, drop to normal X86-32 CC + CCDelegateTo<CC_X86_32_C> +]>; + +// This is the root argument convention for the X86-64 backend. +def CC_X86_64 : CallingConv<[ + CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>, + + // Mingw64 and native Win64 use Win64 CC + CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>, + + // Otherwise, drop to normal X86-64 CC + CCDelegateTo<CC_X86_64_C> +]>; + +// This is the argument convention used for the entire X86 backend. +def CC_X86 : CallingConv<[ + CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>, + CCDelegateTo<CC_X86_32> +]>; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 824021c..60d9d4a 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -68,8 +68,7 @@ namespace { return "X86 Machine Code Emitter"; } - void emitInstruction(const MachineInstr &MI, - const TargetInstrDesc *Desc); + void emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc); void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -131,7 +130,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { const TargetInstrDesc &Desc = I->getDesc(); emitInstruction(*I, &Desc); @@ -598,9 +597,23 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, } template<class CodeEmitter> -void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, +void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc) { DEBUG(dbgs() << MI); + + // If this is a pseudo instruction, lower it. + switch (Desc->getOpcode()) { + case X86::ADD16rr_DB: Desc = &II->get(X86::OR16rr); MI.setDesc(*Desc);break; + case X86::ADD32rr_DB: Desc = &II->get(X86::OR32rr); MI.setDesc(*Desc);break; + case X86::ADD64rr_DB: Desc = &II->get(X86::OR64rr); MI.setDesc(*Desc);break; + case X86::ADD16ri_DB: Desc = &II->get(X86::OR16ri); MI.setDesc(*Desc);break; + case X86::ADD32ri_DB: Desc = &II->get(X86::OR32ri); MI.setDesc(*Desc);break; + case X86::ADD64ri32_DB:Desc = &II->get(X86::OR64ri32);MI.setDesc(*Desc);break; + case X86::ADD16ri8_DB: Desc = &II->get(X86::OR16ri8);MI.setDesc(*Desc);break; + case X86::ADD32ri8_DB: Desc = &II->get(X86::OR32ri8);MI.setDesc(*Desc);break; + case X86::ADD64ri8_DB: Desc = &II->get(X86::OR64ri8);MI.setDesc(*Desc);break; + } + MCE.processDebugLoc(MI.getDebugLoc(), true); diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index f84995d..f1d7ede 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -14,6 +14,7 @@ #include "X86ELFWriterInfo.h" #include "X86Relocations.h" #include "llvm/Function.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -24,8 +25,8 @@ using namespace llvm; // Implementation of the X86ELFWriterInfo class //===----------------------------------------------------------------------===// -X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM) - : TargetELFWriterInfo(TM) { +X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_) + : TargetELFWriterInfo(is64Bit_, isLittleEndian_) { EMachine = is64Bit ? EM_X86_64 : EM_386; } @@ -35,13 +36,13 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { if (is64Bit) { switch(MachineRelTy) { case X86::reloc_pcrel_word: - return R_X86_64_PC32; + return ELF::R_X86_64_PC32; case X86::reloc_absolute_word: - return R_X86_64_32; + return ELF::R_X86_64_32; case X86::reloc_absolute_word_sext: - return R_X86_64_32S; + return ELF::R_X86_64_32S; case X86::reloc_absolute_dword: - return R_X86_64_64; + return ELF::R_X86_64_64; case X86::reloc_picrel_word: default: llvm_unreachable("unknown x86_64 machine relocation type"); @@ -49,9 +50,9 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { } else { switch(MachineRelTy) { case X86::reloc_pcrel_word: - return R_386_PC32; + return ELF::R_386_PC32; case X86::reloc_absolute_word: - return R_386_32; + return ELF::R_386_32; case X86::reloc_absolute_word_sext: case X86::reloc_absolute_dword: case X86::reloc_picrel_word: @@ -66,18 +67,18 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, long int Modifier) const { if (is64Bit) { switch(RelTy) { - case R_X86_64_PC32: return Modifier - 4; - case R_X86_64_32: - case R_X86_64_32S: - case R_X86_64_64: + case ELF::R_X86_64_PC32: return Modifier - 4; + case ELF::R_X86_64_32: + case ELF::R_X86_64_32S: + case ELF::R_X86_64_64: return Modifier; default: llvm_unreachable("unknown x86_64 relocation type"); } } else { switch(RelTy) { - case R_386_PC32: return Modifier - 4; - case R_386_32: return Modifier; + case ELF::R_386_PC32: return Modifier - 4; + case ELF::R_386_32: return Modifier; default: llvm_unreachable("unknown x86 relocation type"); } @@ -88,19 +89,19 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const { if (is64Bit) { switch(RelTy) { - case R_X86_64_PC32: - case R_X86_64_32: - case R_X86_64_32S: + case ELF::R_X86_64_PC32: + case ELF::R_X86_64_32: + case ELF::R_X86_64_32S: return 32; - case R_X86_64_64: + case ELF::R_X86_64_64: return 64; default: llvm_unreachable("unknown x86_64 relocation type"); } } else { switch(RelTy) { - case R_386_PC32: - case R_386_32: + case ELF::R_386_PC32: + case ELF::R_386_32: return 32; default: llvm_unreachable("unknown x86 relocation type"); @@ -112,20 +113,20 @@ unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const { bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { if (is64Bit) { switch(RelTy) { - case R_X86_64_PC32: + case ELF::R_X86_64_PC32: return true; - case R_X86_64_32: - case R_X86_64_32S: - case R_X86_64_64: + case ELF::R_X86_64_32: + case ELF::R_X86_64_32S: + case ELF::R_X86_64_64: return false; default: llvm_unreachable("unknown x86_64 relocation type"); } } else { switch(RelTy) { - case R_386_PC32: + case ELF::R_386_PC32: return true; - case R_386_32: + case ELF::R_386_32: return false; default: llvm_unreachable("unknown x86 relocation type"); @@ -143,7 +144,7 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset, unsigned RelOffset, unsigned RelTy) const { - if (RelTy == R_X86_64_PC32 || RelTy == R_386_PC32) + if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32) return SymOffset - (RelOffset + 4); else assert("computeRelocation unknown for this relocation type"); diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index 342e6e6..a45b5bb 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -20,25 +20,8 @@ namespace llvm { class X86ELFWriterInfo : public TargetELFWriterInfo { - // ELF Relocation types for X86 - enum X86RelocationType { - R_386_NONE = 0, - R_386_32 = 1, - R_386_PC32 = 2 - }; - - // ELF Relocation types for X86_64 - enum X86_64RelocationType { - R_X86_64_NONE = 0, - R_X86_64_64 = 1, - R_X86_64_PC32 = 2, - R_X86_64_32 = 10, - R_X86_64_32S = 11, - R_X86_64_PC64 = 24 - }; - public: - X86ELFWriterInfo(TargetMachine &TM); + X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_); virtual ~X86ELFWriterInfo(); /// getRelocationType - Returns the target specific ELF Relocation type. diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 0c70eec..9d42ac2 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -36,7 +36,7 @@ using namespace llvm; namespace { - + class X86FastISel : public FastISel { /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -46,7 +46,7 @@ class X86FastISel : public FastISel { /// unsigned StackPtr; - /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 + /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 /// floating point ops. /// When SSE is available, use it for f32 operations. /// When SSE2 is available, use it for f64 operations. @@ -63,11 +63,18 @@ public: virtual bool TargetSelectInstruction(const Instruction *I); + /// TryToFoldLoad - The specified machine instr operand is a vreg, and that + /// vreg is being provided by the specified load instruction. If possible, + /// try to fold the load as an operand to the instruction, returning true if + /// possible. + virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI); + #include "X86GenFastISel.inc" private: bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT); - + bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); bool X86FastEmitStore(EVT VT, const Value *Val, @@ -77,12 +84,12 @@ private: bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); - + bool X86SelectAddress(const Value *V, X86AddressMode &AM); bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); bool X86SelectLoad(const Instruction *I); - + bool X86SelectStore(const Instruction *I); bool X86SelectRet(const Instruction *I); @@ -98,7 +105,7 @@ private: bool X86SelectSelect(const Instruction *I); bool X86SelectTrunc(const Instruction *I); - + bool X86SelectFPExt(const Instruction *I); bool X86SelectFPTrunc(const Instruction *I); @@ -107,9 +114,6 @@ private: bool X86VisitIntrinsicCall(const IntrinsicInst &I); bool X86SelectCall(const Instruction *I); - CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false); - CCAssignFn *CCAssignFnForRet(CallingConv::ID CC, bool isTailCall = false); - const X86InstrInfo *getInstrInfo() const { return getTargetMachine()->getInstrInfo(); } @@ -128,17 +132,18 @@ private: (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } - bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false); + bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); }; - + } // end anonymous namespace. -bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) { - VT = TLI.getValueType(Ty, /*HandleUnknown=*/true); - if (VT == MVT::Other || !VT.isSimple()) +bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { + EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); + if (evt == MVT::Other || !evt.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; - + + VT = evt.getSimpleVT(); // For now, require SSE/SSE2 for performing floating-point operations, // since x87 requires additional work. if (VT == MVT::f64 && !X86ScalarSSEf64) @@ -157,45 +162,6 @@ bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) { #include "X86GenCallingConv.inc" -/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling -/// convention. -CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, - bool isTaillCall) { - if (Subtarget->is64Bit()) { - if (CC == CallingConv::GHC) - return CC_X86_64_GHC; - else if (Subtarget->isTargetWin64()) - return CC_X86_Win64_C; - else - return CC_X86_64_C; - } - - if (CC == CallingConv::X86_FastCall) - return CC_X86_32_FastCall; - else if (CC == CallingConv::X86_ThisCall) - return CC_X86_32_ThisCall; - else if (CC == CallingConv::Fast) - return CC_X86_32_FastCC; - else if (CC == CallingConv::GHC) - return CC_X86_32_GHC; - else - return CC_X86_32_C; -} - -/// CCAssignFnForRet - Selects the correct CCAssignFn for a given calling -/// convention. -CCAssignFn *X86FastISel::CCAssignFnForRet(CallingConv::ID CC, - bool isTaillCall) { - if (Subtarget->is64Bit()) { - if (Subtarget->isTargetWin64()) - return RetCC_X86_Win64_C; - else - return RetCC_X86_64_C; - } - - return RetCC_X86_32_C; -} - /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. /// Return true and the result register by reference if it is possible. @@ -284,7 +250,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m; break; } - + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)), AM).addReg(Val); return true; @@ -295,7 +261,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, // Handle 'null' like i32/i64 0. if (isa<ConstantPointerNull>(Val)) Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); - + // If this is a store of a simple constant, fold the constant into the store. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { unsigned Opc = 0; @@ -312,7 +278,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, Opc = X86::MOV64mi32; break; } - + if (Opc) { addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)), AM) @@ -321,11 +287,11 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, return true; } } - + unsigned ValReg = getRegForValue(Val); if (ValReg == 0) - return false; - + return false; + return X86FastEmitStore(VT, ValReg, AM); } @@ -337,7 +303,7 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned &ResultReg) { unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src, /*TODO: Kill=*/false); - + if (RR != 0) { ResultReg = RR; return true; @@ -354,11 +320,11 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // Don't walk into other basic blocks; it's possible we haven't // visited them yet, so the instructions may not yet be assigned // virtual registers. - if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB) - return false; - - Opcode = I->getOpcode(); - U = I; + if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) || + FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + Opcode = I->getOpcode(); + U = I; + } } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { Opcode = C->getOpcode(); U = C; @@ -472,7 +438,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { AM.Disp = (uint32_t)Disp; if (X86SelectAddress(U->getOperand(0), AM)) return true; - + // If we couldn't merge the sub value into this addr mode, revert back to // our address and just match the value instead of completely failing. AM = SavedAM; @@ -501,7 +467,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // Okay, we've committed to selecting this global. Set up the basic address. AM.GV = GV; - + // Allow the subtarget to classify the global. unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); @@ -510,7 +476,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // FIXME: How do we know Base.Reg is free?? AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } - + // Unless the ABI requires an extra load, return a direct reference to // the global. if (!isGlobalStubReference(GVFlags)) { @@ -523,7 +489,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { AM.GVOpFlags = GVFlags; return true; } - + // Ok, we need to do a load from a stub. If we've already loaded from this // stub, reuse the loaded pointer, otherwise emit the load now. DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V); @@ -545,14 +511,14 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { if (TLI.getPointerTy() == MVT::i64) { Opc = X86::MOV64rm; RC = X86::GR64RegisterClass; - + if (Subtarget->isPICStyleRIPRel()) StubAM.Base.Reg = X86::RIP; } else { Opc = X86::MOV32rm; RC = X86::GR32RegisterClass; } - + LoadReg = createResultReg(RC); MachineInstrBuilder LoadMI = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); @@ -564,7 +530,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // Prevent loading GV stub multiple times in same MBB. LocalValueMap[V] = LoadReg; } - + // Now construct the final address. Note that the Disp, Scale, // and Index values may already be set here. AM.Base.Reg = LoadReg; @@ -638,7 +604,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { // Okay, we've committed to selecting this global. Set up the basic address. AM.GV = GV; - + // No ABI requires an extra load for anything other than DLLImport, which // we rejected above. Return a direct reference to the global. if (Subtarget->isPICStyleRIPRel()) { @@ -651,7 +617,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { } else if (Subtarget->isPICStyleGOT()) { AM.GVOpFlags = X86II::MO_GOTOFF; } - + return true; } @@ -674,7 +640,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { /// X86SelectStore - Select and emit code to implement store instructions. bool X86FastISel::X86SelectStore(const Instruction *I) { - EVT VT; + MVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; @@ -724,7 +690,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); - CCInfo.AnalyzeReturn(Outs, CCAssignFnForRet(CC)); + CCInfo.AnalyzeReturn(Outs, RetCC_X86); const Value *RV = Ret->getOperand(0); unsigned Reg = getRegForValue(RV); @@ -736,7 +702,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { return false; CCValAssign &VA = ValLocs[0]; - + // Don't bother handling odd stuff for now. if (VA.getLocInfo() != CCValAssign::Full) return false; @@ -745,7 +711,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { return false; // TODO: For now, don't try to handle cases where getLocInfo() // says Full but the types don't match. - if (VA.getValVT() != TLI.getValueType(RV->getType())) + if (TLI.getValueType(RV->getType()) != VA.getValVT()) return false; // The calling-convention tables for x87 returns don't tell @@ -775,7 +741,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { /// X86SelectLoad - Select and emit code to implement load instructions. /// bool X86FastISel::X86SelectLoad(const Instruction *I) { - EVT VT; + MVT VT; if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) return false; @@ -826,11 +792,11 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT) { unsigned Op0Reg = getRegForValue(Op0); if (Op0Reg == 0) return false; - + // Handle 'null' like i32/i64 0. if (isa<ConstantPointerNull>(Op1)) Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext())); - + // We have two options: compare with register or immediate. If the RHS of // the compare is an immediate that we can fold into this compare, use // CMPri, otherwise use CMPrr. @@ -842,23 +808,23 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, return true; } } - + unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget); if (CompareOpc == 0) return false; - + unsigned Op1Reg = getRegForValue(Op1); if (Op1Reg == 0) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc)) .addReg(Op0Reg) .addReg(Op1Reg); - + return true; } bool X86FastISel::X86SelectCmp(const Instruction *I) { const CmpInst *CI = cast<CmpInst>(I); - EVT VT; + MVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT)) return false; @@ -869,13 +835,13 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { case CmpInst::FCMP_OEQ: { if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) return false; - + unsigned EReg = createResultReg(&X86::GR8RegClass); unsigned NPReg = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNPr), NPReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); UpdateValueMap(I, ResultReg); return true; @@ -908,7 +874,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break; case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; - + case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; @@ -930,7 +896,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { // Emit a compare of Op0/Op1. if (!X86FastEmitCompare(Op0, Op1, VT)) return false; - + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg); UpdateValueMap(I, ResultReg); return true; @@ -995,7 +961,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break; case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; - + case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break; case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break; case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break; @@ -1009,7 +975,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { default: return false; } - + const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); if (SwapArgs) std::swap(Op0, Op1); @@ -1017,7 +983,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // Emit a compare of the LHS and RHS, setting the flags. if (!X86FastEmitCompare(Op0, Op1, VT)) return false; - + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc)) .addMBB(TrueMBB); @@ -1070,8 +1036,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { } const TargetInstrDesc &TID = MI.getDesc(); - if (TID.hasUnmodeledSideEffects() || - TID.hasImplicitDefOfPhysReg(X86::EFLAGS)) + if (TID.hasImplicitDefOfPhysReg(X86::EFLAGS) || + MI.hasUnmodeledSideEffects()) break; } @@ -1147,22 +1113,22 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { return false; } - EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); - if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) + MVT VT; + if (!isTypeLegal(I->getType(), VT)) return false; unsigned Op0Reg = getRegForValue(I->getOperand(0)); if (Op0Reg == 0) return false; - + // Fold immediate in shl(x,3). if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm), ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); UpdateValueMap(I, ResultReg); return true; } - + unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), @@ -1183,23 +1149,26 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { } bool X86FastISel::X86SelectSelect(const Instruction *I) { - EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); - if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) + MVT VT; + if (!isTypeLegal(I->getType(), VT)) return false; - + + // We only use cmov here, if we don't have a cmov instruction bail. + if (!Subtarget->hasCMov()) return false; + unsigned Opc = 0; const TargetRegisterClass *RC = NULL; - if (VT.getSimpleVT() == MVT::i16) { + if (VT == MVT::i16) { Opc = X86::CMOVE16rr; RC = &X86::GR16RegClass; - } else if (VT.getSimpleVT() == MVT::i32) { + } else if (VT == MVT::i32) { Opc = X86::CMOVE32rr; RC = &X86::GR32RegClass; - } else if (VT.getSimpleVT() == MVT::i64) { + } else if (VT == MVT::i64) { Opc = X86::CMOVE64rr; RC = &X86::GR64RegClass; } else { - return false; + return false; } unsigned Op0Reg = getRegForValue(I->getOperand(0)); @@ -1264,7 +1233,7 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { return false; EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); - + // This code only handles truncation to byte right now. if (DstVT != MVT::i8 && DstVT != MVT::i1) // All other cases should be handled by the tblgen generated code. @@ -1335,21 +1304,21 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // Grab the frame index. X86AddressMode AM; if (!X86SelectAddress(Slot, AM)) return false; - + if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; - + return true; } case Intrinsic::objectsize: { ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); const Type *Ty = I.getCalledFunction()->getReturnType(); - + assert(CI && "Non-constant type in Intrinsic::objectsize?"); - - EVT VT; + + MVT VT; if (!isTypeLegal(Ty, VT)) return false; - + unsigned OpC = 0; if (VT == MVT::i32) OpC = X86::MOV32ri; @@ -1357,7 +1326,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { OpC = X86::MOV64ri; else return false; - + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg). addImm(CI->isZero() ? -1ULL : 0); @@ -1392,7 +1361,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { const Type *RetTy = cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); - EVT VT; + MVT VT; if (!isTypeLegal(RetTy, VT)) return false; @@ -1429,7 +1398,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { ResultReg = DestReg1+1; else ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8)); - + unsigned Opc = X86::SETBr; if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) Opc = X86::SETOr; @@ -1476,7 +1445,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Handle *simple* calls for now. const Type *RetTy = CS.getType(); - EVT RetVT; + MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; else if (!isTypeLegal(RetTy, RetVT, true)) @@ -1506,7 +1475,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Deal with call operands first. SmallVector<const Value *, 8> ArgVals; SmallVector<unsigned, 8> Args; - SmallVector<EVT, 8> ArgVTs; + SmallVector<MVT, 8> ArgVTs; SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; Args.reserve(CS.arg_size()); ArgVals.reserve(CS.arg_size()); @@ -1532,7 +1501,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { return false; const Type *ArgTy = (*i)->getType(); - EVT ArgVT; + MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT)) return false; unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); @@ -1547,13 +1516,13 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); - + // Allocate shadow area for Win64 - if (Subtarget->isTargetWin64()) { - CCInfo.AllocateStack(32, 8); + if (Subtarget->isTargetWin64()) { + CCInfo.AllocateStack(32, 8); } - CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -1570,7 +1539,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { CCValAssign &VA = ArgLocs[i]; unsigned Arg = Args[VA.getValNo()]; EVT ArgVT = ArgVTs[VA.getValNo()]; - + // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); @@ -1578,20 +1547,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { case CCValAssign::SExt: { bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), Arg, ArgVT, Arg); - assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted; - Emitted = true; + assert(Emitted && "Failed to emit a sext!"); (void)Emitted; ArgVT = VA.getLocVT(); break; } case CCValAssign::ZExt: { bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), Arg, ArgVT, Arg); - assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted; - Emitted = true; + assert(Emitted && "Failed to emit a zext!"); (void)Emitted; ArgVT = VA.getLocVT(); break; } case CCValAssign::AExt: { + // We don't handle MMX parameters yet. + if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() == 128) + return false; bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), Arg, ArgVT, Arg); if (!Emitted) @@ -1600,21 +1570,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (!Emitted) Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), Arg, ArgVT, Arg); - - assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted; + + assert(Emitted && "Failed to emit a aext!"); (void)Emitted; ArgVT = VA.getLocVT(); break; } case CCValAssign::BCvt: { - unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(), - ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false); + unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(), + ISD::BITCAST, Arg, /*TODO: Kill=*/false); assert(BC != 0 && "Failed to emit a bitcast!"); Arg = BC; ArgVT = VA.getLocVT(); break; } } - + if (VA.isRegLoc()) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg); @@ -1625,7 +1595,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { AM.Base.Reg = StackPtr; AM.Disp = LocMemOffset; const Value *ArgVal = ArgVals[VA.getValNo()]; - + // If this is a really simple value, emit this with the Value* version of // X86FastEmitStore. If it isn't simple, we don't want to do this, as it // can cause us to reevaluate the argument. @@ -1637,13 +1607,13 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { } // ELF / PIC requires GOT in the EBX register before function calls via PLT - // GOT pointer. + // GOT pointer. if (Subtarget->isPICStyleGOT()) { unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base); } - + // Issue the call. MachineInstrBuilder MIB; if (CalleeOp) { @@ -1657,7 +1627,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { CallOpc = X86::CALL32r; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addReg(CalleeOp); - + } else { // Direct call. assert(GV && "Not a direct call"); @@ -1668,10 +1638,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { CallOpc = X86::CALL64pcrel32; else CallOpc = X86::CALLpcrel32; - + // See if we need any target-specific flags on the GV operand. unsigned char OpFlags = 0; - + // On ELF targets, in both X86-64 and X86-32 mode, direct calls to // external symbols most go through the PLT in PIC mode. If the symbol // has hidden or protected visibility, or if it is static or local, then @@ -1688,8 +1658,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // automatically synthesizes these stubs. OpFlags = X86II::MO_DARWIN_STUB; } - - + + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addGlobalAddress(GV, 0, OpFlags); } @@ -1709,7 +1679,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Now handle call return value (if any). SmallVector<unsigned, 4> UsedRegs; - if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) { + if (RetVT != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext()); CCInfo.AnalyzeCallResult(RetVT, RetCC_X86); @@ -1718,7 +1688,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); EVT CopyVT = RVLocs[0].getValVT(); TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); - + // If this is a call to a function that returns an fp value on the x87 fp // stack, but where we prefer to use the value in xmm registers, copy it // out as F80 and use a truncate to move it from fp stack reg to xmm reg. @@ -1756,7 +1726,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (AndToI1) { // Mask out all but lowest bit for some call which produces an i1. unsigned AndResult = createResultReg(X86::GR8RegisterClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); ResultReg = AndResult; } @@ -1823,14 +1793,14 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) { } unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { - EVT VT; + MVT VT; if (!isTypeLegal(C->getType(), VT)) return false; - + // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return false; case MVT::i8: Opc = X86::MOV8rm; @@ -1871,7 +1841,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { // No f80 support yet. return false; } - + // Materialize addresses with LEA instructions. if (isa<GlobalValue>(C)) { X86AddressMode AM; @@ -1887,14 +1857,14 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { } return 0; } - + // MachineConstantPool wants an explicit alignment. unsigned Align = TD.getPrefTypeAlignment(C->getType()); if (Align == 0) { // Alignment of vector types. FIXME! Align = TD.getTypeAllocSize(C->getType()); } - + // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; unsigned char OpFlag = 0; @@ -1941,6 +1911,34 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { return ResultReg; } +/// TryToFoldLoad - The specified machine instr operand is a vreg, and that +/// vreg is being provided by the specified load instruction. If possible, +/// try to fold the load as an operand to the instruction, returning true if +/// possible. +bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) { + X86AddressMode AM; + if (!X86SelectAddress(LI->getOperand(0), AM)) + return false; + + X86InstrInfo &XII = (X86InstrInfo&)TII; + + unsigned Size = TD.getTypeAllocSize(LI->getType()); + unsigned Alignment = LI->getAlignment(); + + SmallVector<MachineOperand, 8> AddrOps; + AM.getFullAddress(AddrOps); + + MachineInstr *Result = + XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment); + if (Result == 0) return false; + + FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); + MI->eraseFromParent(); + return true; +} + + namespace llvm { llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { return new X86FastISel(funcInfo); diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h index 96e0aae..17d242a 100644 --- a/lib/Target/X86/X86FixupKinds.h +++ b/lib/Target/X86/X86FixupKinds.h @@ -15,11 +15,17 @@ namespace llvm { namespace X86 { enum Fixups { - reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch. - reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1 - reloc_pcrel_2byte, // 16-bit pcrel, e.g. callw - reloc_riprel_4byte, // 32-bit rip-relative - reloc_riprel_4byte_movq_load // 32-bit rip-relative in movq + reloc_riprel_4byte = FirstTargetFixupKind, // 32-bit rip-relative + reloc_riprel_4byte_movq_load, // 32-bit rip-relative in movq + reloc_signed_4byte, // 32-bit signed. Unlike FK_Data_4 + // this will be sign extended at + // runtime. + reloc_global_offset_table, // 32-bit, relative to the start + // of the instruction. Used only + // for _GLOBAL_OFFSET_TABLE_. + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind }; } } diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index e6ebf66..3aaa693 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -51,6 +52,7 @@ namespace { struct FPS : public MachineFunctionPass { static char ID; FPS() : MachineFunctionPass(ID) { + initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); // This is really only to keep valgrind quiet. // The logic in isLive() is too much for it. memset(Stack, 0, sizeof(Stack)); @@ -59,6 +61,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<EdgeBundles>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -94,7 +97,7 @@ namespace { // FixStack[i] == getStackEntry(i) for all i < FixCount. unsigned char FixStack[8]; - LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {} + LiveBundle() : Mask(0), FixCount(0) {} // Have the live registers been assigned a stack order yet? bool isFixed() const { return !Mask || FixCount; } @@ -104,10 +107,8 @@ namespace { // with no live FP registers. SmallVector<LiveBundle, 8> LiveBundles; - // Map each MBB in the current function to an (ingoing, outgoing) index into - // LiveBundles. Blocks with no FP registers live in or out map to (0, 0) - // and are not actually stored in the map. - DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle; + // The edge bundle analysis provides indices into the LiveBundles vector. + EdgeBundles *Bundles; // Return a bitmask of FP registers in block's live-in list. unsigned calcLiveInMask(MachineBasicBlock *MBB) { @@ -167,7 +168,8 @@ namespace { /// getStackEntry - Return the X86::FP<n> register in register ST(i). unsigned getStackEntry(unsigned STi) const { - assert(STi < StackTop && "Access past stack top!"); + if (STi >= StackTop) + report_fatal_error("Access past stack top!"); return Stack[StackTop-1-STi]; } @@ -180,7 +182,8 @@ namespace { // pushReg - Push the specified FP<n> register onto the stack. void pushReg(unsigned Reg) { assert(Reg < 8 && "Register number out of range!"); - assert(StackTop < 8 && "Stack overflow!"); + if (StackTop >= 8) + report_fatal_error("Stack overflow!"); Stack[StackTop] = Reg; RegMap[Reg] = StackTop++; } @@ -197,7 +200,8 @@ namespace { std::swap(RegMap[RegNo], RegMap[RegOnTop]); // Swap stack slot contents. - assert(RegMap[RegOnTop] < StackTop); + if (RegMap[RegOnTop] >= StackTop) + report_fatal_error("Access past stack top!"); std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); // Emit an fxch to update the runtime processors version of the state. @@ -281,6 +285,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { // Early exit. if (!FPIsUsed) return false; + Bundles = &getAnalysis<EdgeBundles>(); TII = MF.getTarget().getInstrInfo(); // Prepare cross-MBB liveness. @@ -305,7 +310,6 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { if (Processed.insert(BB)) Changed |= processBasicBlock(MF, *BB); - BlockBundle.clear(); LiveBundles.clear(); return Changed; @@ -318,90 +322,16 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { /// registers may be implicitly defined, or not used by all successors. void FPS::bundleCFG(MachineFunction &MF) { assert(LiveBundles.empty() && "Stale data in LiveBundles"); - assert(BlockBundle.empty() && "Stale data in BlockBundle"); - SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp; + LiveBundles.resize(Bundles->getNumBundles()); - // LiveBundle[0] is the empty live-in set. - LiveBundles.resize(1); - - // First gather the actual live-in masks for all MBBs. + // Gather the actual live-in masks for all MBBs. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = I; const unsigned Mask = calcLiveInMask(MBB); if (!Mask) continue; - // Ingoing bundle index. - unsigned &Idx = BlockBundle[MBB].first; - // Already assigned an ingoing bundle? - if (Idx) - continue; - // Allocate a new LiveBundle struct for this block's live-ins. - const unsigned BundleIdx = Idx = LiveBundles.size(); - DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#" - << MBB->getNumber()); - LiveBundles.push_back(Mask); - LiveBundle &Bundle = LiveBundles.back(); - - // Make sure all predecessors have the same live-out set. - PropUp.insert(MBB); - - // Keep pushing liveness up and down the CFG until convergence. - // Only critical edges cause iteration here, but when they do, multiple - // blocks can be assigned to the same LiveBundle index. - do { - // Assign BundleIdx as liveout from predecessors in PropUp. - for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(), - E = PropUp.end(); I != E; ++I) { - MachineBasicBlock *MBB = *I; - for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(), - LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) { - MachineBasicBlock *PredMBB = *LinkI; - // PredMBB's liveout bundle should be set to LIIdx. - unsigned &Idx = BlockBundle[PredMBB].second; - if (Idx) { - assert(Idx == BundleIdx && "Inconsistent CFG"); - continue; - } - Idx = BundleIdx; - DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber()); - // Propagate to siblings. - if (PredMBB->succ_size() > 1) - PropDown.insert(PredMBB); - } - } - PropUp.clear(); - - // Assign BundleIdx as livein to successors in PropDown. - for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(), - E = PropDown.end(); I != E; ++I) { - MachineBasicBlock *MBB = *I; - for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(), - LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) { - MachineBasicBlock *SuccMBB = *LinkI; - // LinkMBB's livein bundle should be set to BundleIdx. - unsigned &Idx = BlockBundle[SuccMBB].first; - if (Idx) { - assert(Idx == BundleIdx && "Inconsistent CFG"); - continue; - } - Idx = BundleIdx; - DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber()); - // Propagate to siblings. - if (SuccMBB->pred_size() > 1) - PropUp.insert(SuccMBB); - // Also accumulate the bundle liveness mask from the liveins here. - Bundle.Mask |= calcLiveInMask(SuccMBB); - } - } - PropDown.clear(); - } while (!PropUp.empty()); - DEBUG({ - dbgs() << " live:"; - for (unsigned i = 0; i < 8; ++i) - if (Bundle.Mask & (1<<i)) - dbgs() << " %FP" << i; - dbgs() << '\n'; - }); + // Update MBB ingoing bundle mask. + LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask; } } @@ -489,13 +419,15 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { return Changed; } -/// setupBlockStack - Use the BlockBundle map to set up our model of the stack +/// setupBlockStack - Use the live bundles to set up our model of the stack /// to match predecessors' live out stack. void FPS::setupBlockStack() { DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber() << " derived from " << MBB->getName() << ".\n"); StackTop = 0; - const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first]; + // Get the live-in bundle for MBB. + const LiveBundle &Bundle = + LiveBundles[Bundles->getBundle(MBB->getNumber(), false)]; if (!Bundle.Mask) { DEBUG(dbgs() << "Block has no FP live-ins.\n"); @@ -532,7 +464,8 @@ void FPS::finishBlockStack() { DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber() << " derived from " << MBB->getName() << ".\n"); - unsigned BundleIdx = BlockBundle.lookup(MBB).second; + // Get MBB's live-out bundle. + unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true); LiveBundle &Bundle = LiveBundles[BundleIdx]; // We may need to kill and define some registers to match successors. @@ -572,7 +505,8 @@ namespace { friend bool operator<(const TableEntry &TE, unsigned V) { return TE.from < V; } - friend bool ATTRIBUTE_USED operator<(unsigned V, const TableEntry &TE) { + friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V, + const TableEntry &TE) { return V < TE.from; } }; @@ -824,7 +758,8 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) { MachineInstr* MI = I; DebugLoc dl = MI->getDebugLoc(); ASSERT_SORTED(PopTable); - assert(StackTop > 0 && "Cannot pop empty stack!"); + if (StackTop == 0) + report_fatal_error("Cannot pop empty stack!"); RegMap[Stack[--StackTop]] = ~0; // Update state // Check to see if there is a popping version of this instruction... @@ -1016,7 +951,8 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { MI->getOpcode() == X86::ISTT_FP32m || MI->getOpcode() == X86::ISTT_FP64m || MI->getOpcode() == X86::ST_FP80m) { - assert(StackTop > 0 && "Stack empty??"); + if (StackTop == 0) + report_fatal_error("Stack empty??"); --StackTop; } else if (KillsSrc) { // Last use of operand? popStackAfter(I); @@ -1047,7 +983,8 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { // If this is the last use of the source register, just make sure it's on // the top of the stack. moveToTop(Reg, I); - assert(StackTop > 0 && "Stack cannot be empty!"); + if (StackTop == 0) + report_fatal_error("Stack cannot be empty!"); --StackTop; pushReg(getFPReg(MI->getOperand(0))); } else { @@ -1300,7 +1237,6 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { /// void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { MachineInstr *MI = I; - DebugLoc dl = MI->getDebugLoc(); switch (MI->getOpcode()) { default: llvm_unreachable("Unknown SpecialFP instruction!"); case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! @@ -1341,7 +1277,8 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { std::swap(RegMap[RegNo], RegMap[RegOnTop]); // Swap stack slot contents. - assert(RegMap[RegOnTop] < StackTop); + if (RegMap[RegOnTop] >= StackTop) + report_fatal_error("Access past stack top!"); std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); break; } diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp new file mode 100644 index 0000000..0a3f931 --- /dev/null +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -0,0 +1,994 @@ +//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the X86 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "X86FrameLowering.h" +#include "X86InstrBuilder.h" +#include "X86InstrInfo.h" +#include "X86MachineFunctionInfo.h" +#include "X86TargetMachine.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallSet.h" + +using namespace llvm; + +// FIXME: completely move here. +extern cl::opt<bool> ForceStackAlign; + +bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +/// hasFP - Return true if the specified function should have a dedicated frame +/// pointer register. This is true if the function has variable sized allocas +/// or if frame pointer elimination is disabled. +bool X86FrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineModuleInfo &MMI = MF.getMMI(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + + return (DisableFramePointerElim(MF) || + RI->needsStackRealignment(MF) || + MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken() || + MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || + MMI.callsUnwindInit()); +} + +static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { + if (is64Bit) { + if (isInt<8>(Imm)) + return X86::SUB64ri8; + return X86::SUB64ri32; + } else { + if (isInt<8>(Imm)) + return X86::SUB32ri8; + return X86::SUB32ri; + } +} + +static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { + if (is64Bit) { + if (isInt<8>(Imm)) + return X86::ADD64ri8; + return X86::ADD64ri32; + } else { + if (isInt<8>(Imm)) + return X86::ADD32ri8; + return X86::ADD32ri; + } +} + +/// findDeadCallerSavedReg - Return a caller-saved register that isn't live +/// when it reaches the "return" instruction. We can then pop a stack object +/// to this register without worry about clobbering it. +static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const TargetRegisterInfo &TRI, + bool Is64Bit) { + const MachineFunction *MF = MBB.getParent(); + const Function *F = MF->getFunction(); + if (!F || MF->getMMI().callsEHReturn()) + return 0; + + static const unsigned CallerSavedRegs32Bit[] = { + X86::EAX, X86::EDX, X86::ECX + }; + + static const unsigned CallerSavedRegs64Bit[] = { + X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, + X86::R8, X86::R9, X86::R10, X86::R11 + }; + + unsigned Opc = MBBI->getOpcode(); + switch (Opc) { + default: return 0; + case X86::RET: + case X86::RETI: + case X86::TCRETURNdi: + case X86::TCRETURNri: + case X86::TCRETURNmi: + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + case X86::EH_RETURN: + case X86::EH_RETURN64: { + SmallSet<unsigned, 8> Uses; + for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MBBI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI) + Uses.insert(*AsI); + } + + const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; + for (; *CS; ++CS) + if (!Uses.count(*CS)) + return *CS; + } + } + + return 0; +} + + +/// emitSPUpdate - Emit a series of instructions to increment / decrement the +/// stack pointer by a constant value. +static +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, int64_t NumBytes, + bool Is64Bit, const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI) { + bool isSub = NumBytes < 0; + uint64_t Offset = isSub ? -NumBytes : NumBytes; + unsigned Opc = isSub ? + getSUBriOpcode(Is64Bit, Offset) : + getADDriOpcode(Is64Bit, Offset); + uint64_t Chunk = (1LL << 31) - 1; + DebugLoc DL = MBB.findDebugLoc(MBBI); + + while (Offset) { + uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; + if (ThisVal == (Is64Bit ? 8 : 4)) { + // Use push / pop instead. + unsigned Reg = isSub + ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) + : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); + if (Reg) { + Opc = isSub + ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) + : (Is64Bit ? X86::POP64r : X86::POP32r); + BuildMI(MBB, MBBI, DL, TII.get(Opc)) + .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); + Offset -= ThisVal; + continue; + } + } + + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(ThisVal); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + Offset -= ThisVal; + } +} + +/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. +static +void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, uint64_t *NumBytes = NULL) { + if (MBBI == MBB.begin()) return; + + MachineBasicBlock::iterator PI = prior(MBBI); + unsigned Opc = PI->getOpcode(); + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes += PI->getOperand(2).getImm(); + MBB.erase(PI); + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes -= PI->getOperand(2).getImm(); + MBB.erase(PI); + } +} + +/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. +static +void mergeSPUpdatesDown(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, uint64_t *NumBytes = NULL) { + // FIXME: THIS ISN'T RUN!!! + return; + + if (MBBI == MBB.end()) return; + + MachineBasicBlock::iterator NI = llvm::next(MBBI); + if (NI == MBB.end()) return; + + unsigned Opc = NI->getOpcode(); + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + NI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes -= NI->getOperand(2).getImm(); + MBB.erase(NI); + MBBI = NI; + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + NI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes += NI->getOperand(2).getImm(); + MBB.erase(NI); + MBBI = NI; + } +} + +/// mergeSPUpdates - Checks the instruction before/after the passed +/// instruction. If it is an ADD/SUB instruction it is deleted argument and the +/// stack adjustment is returned as a positive value for ADD and a negative for +/// SUB. +static int mergeSPUpdates(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, + bool doMergeWithPrevious) { + if ((doMergeWithPrevious && MBBI == MBB.begin()) || + (!doMergeWithPrevious && MBBI == MBB.end())) + return 0; + + MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; + MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); + unsigned Opc = PI->getOpcode(); + int Offset = 0; + + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + PI->getOperand(0).getReg() == StackPtr){ + Offset += PI->getOperand(2).getImm(); + MBB.erase(PI); + if (!doMergeWithPrevious) MBBI = NI; + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + Offset -= PI->getOperand(2).getImm(); + MBB.erase(PI); + if (!doMergeWithPrevious) MBBI = NI; + } + + return Offset; +} + +static bool isEAXLiveIn(MachineFunction &MF) { + for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), + EE = MF.getRegInfo().livein_end(); II != EE; ++II) { + unsigned Reg = II->first; + + if (Reg == X86::EAX || Reg == X86::AX || + Reg == X86::AH || Reg == X86::AL) + return true; + } + + return false; +} + +void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, + MCSymbol *Label, + unsigned FramePtr) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + + // Add callee saved registers to move list. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + if (CSI.empty()) return; + + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + const TargetData *TD = TM.getTargetData(); + bool HasFP = hasFP(MF); + + // Calculate amount of bytes used for return address storing. + int stackGrowth = -TD->getPointerSize(); + + // FIXME: This is dirty hack. The code itself is pretty mess right now. + // It should be rewritten from scratch and generalized sometimes. + + // Determine maximum offset (minumum due to stack growth). + int64_t MaxOffset = 0; + for (std::vector<CalleeSavedInfo>::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) + MaxOffset = std::min(MaxOffset, + MFI->getObjectOffset(I->getFrameIdx())); + + // Calculate offsets. + int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; + for (std::vector<CalleeSavedInfo>::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + Offset = MaxOffset - Offset + saveAreaOffset; + + // Don't output a new machine move if we're re-saving the frame + // pointer. This happens when the PrologEpilogInserter has inserted an extra + // "PUSH" of the frame pointer -- the "emitPrologue" method automatically + // generates one when frame pointers are used. If we generate a "machine + // move" for this extra "PUSH", the linker will lose track of the fact that + // the frame pointer should have the value of the first "PUSH" when it's + // trying to unwind. + // + // FIXME: This looks inelegant. It's possibly correct, but it's covering up + // another bug. I.e., one where we generate a prolog like this: + // + // pushl %ebp + // movl %esp, %ebp + // pushl %ebp + // pushl %esi + // ... + // + // The immediate re-push of EBP is unnecessary. At the least, it's an + // optimization bug. EBP can be used as a scratch register in certain + // cases, but probably not when we have a frame pointer. + if (HasFP && FramePtr == Reg) + continue; + + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(Label, CSDst, CSSrc)); + } +} + +/// emitPrologue - Push callee-saved registers onto the stack, which +/// automatically adjust the stack pointer. Adjust the stack pointer to allocate +/// space for local variables. Also emit labels used by the exception handler to +/// generate the exception handling frames. +void X86FrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *Fn = MF.getFunction(); + const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); + const X86InstrInfo &TII = *TM.getInstrInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + bool needsFrameMoves = MMI.hasDebugInfo() || + !Fn->doesNotThrow() || UnwindTablesMandatory; + uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. + uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. + bool HasFP = hasFP(MF); + bool Is64Bit = STI.is64Bit(); + bool IsWin64 = STI.isTargetWin64(); + unsigned StackAlign = getStackAlignment(); + unsigned SlotSize = RegInfo->getSlotSize(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned StackPtr = RegInfo->getStackRegister(); + + DebugLoc DL; + + // If we're forcing a stack realignment we can't rely on just the frame + // info, we need to know the ABI stack alignment as well in case we + // have a call out. Otherwise just make sure we have some alignment - we'll + // go with the minimum SlotSize. + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else if (MaxAlign < SlotSize) + MaxAlign = SlotSize; + } + + // Add RETADDR move area to callee saved frame size. + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) + X86FI->setCalleeSavedFrameSize( + X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); + + // If this is x86-64 and the Red Zone is not disabled, if we are a leaf + // function, and use up to 128 bytes of stack space, don't have a frame + // pointer, calls, or dynamic alloca then we do not need to adjust the + // stack pointer (we fit in the Red Zone). + if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && + !RegInfo->needsStackRealignment(MF) && + !MFI->hasVarSizedObjects() && // No dynamic alloca. + !MFI->adjustsStack() && // No calls. + !IsWin64) { // Win64 has no Red Zone + uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); + if (HasFP) MinSize += SlotSize; + StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); + MFI->setStackSize(StackSize); + } + + // Insert stack pointer adjustment for later moving of return addr. Only + // applies to tail call optimized functions where the callee argument stack + // size is bigger than the callers. + if (TailCallReturnAddrDelta < 0) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, + TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), + StackPtr) + .addReg(StackPtr) + .addImm(-TailCallReturnAddrDelta); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + } + + // Mapping for machine moves: + // + // DST: VirtualFP AND + // SRC: VirtualFP => DW_CFA_def_cfa_offset + // ELSE => DW_CFA_def_cfa + // + // SRC: VirtualFP AND + // DST: Register => DW_CFA_def_cfa_register + // + // ELSE + // OFFSET < 0 => DW_CFA_offset_extended_sf + // REG < 64 => DW_CFA_offset + Reg + // ELSE => DW_CFA_offset_extended + + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + const TargetData *TD = MF.getTarget().getTargetData(); + uint64_t NumBytes = 0; + int stackGrowth = -TD->getPointerSize(); + + if (HasFP) { + // Calculate required stack adjustment. + uint64_t FrameSize = StackSize - SlotSize; + if (RegInfo->needsStackRealignment(MF)) + FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; + + NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); + + // Get the offset of the stack slot for the EBP register, which is + // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. + // Update the frame offset adjustment. + MFI->setOffsetAdjustment(-NumBytes); + + // Save EBP/RBP into the appropriate stack slot. + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(FramePtr, RegState::Kill); + + if (needsFrameMoves) { + // Mark the place where EBP/RBP was saved. + MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); + + // Define the current CFA rule to use the provided offset. + if (StackSize) { + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth); + Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); + } else { + MachineLocation SPDst(StackPtr); + MachineLocation SPSrc(StackPtr, stackGrowth); + Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); + } + + // Change the rule for the FramePtr to be an "offset" rule. + MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth); + MachineLocation FPSrc(FramePtr); + Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); + } + + // Update EBP with the new base value... + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) + .addReg(StackPtr); + + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer becomes valid. + MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); + + // Define the current CFA to use the EBP/RBP register. + MachineLocation FPDst(FramePtr); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); + } + + // Mark the FramePtr as live-in in every block except the entry. + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + I != E; ++I) + I->addLiveIn(FramePtr); + + // Realign stack + if (RegInfo->needsStackRealignment(MF)) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), + StackPtr).addReg(StackPtr).addImm(-MaxAlign); + + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); + } + } else { + NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); + } + + // Skip the callee-saved push instructions. + bool PushedRegs = false; + int StackOffset = 2 * stackGrowth; + + while (MBBI != MBB.end() && + (MBBI->getOpcode() == X86::PUSH32r || + MBBI->getOpcode() == X86::PUSH64r)) { + PushedRegs = true; + ++MBBI; + + if (!HasFP && needsFrameMoves) { + // Mark callee-saved push instruction. + MCSymbol *Label = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); + + // Define the current CFA rule to use the provided offset. + unsigned Ptr = StackSize ? + MachineLocation::VirtualFP : StackPtr; + MachineLocation SPDst(Ptr); + MachineLocation SPSrc(Ptr, StackOffset); + Moves.push_back(MachineMove(Label, SPDst, SPSrc)); + StackOffset += stackGrowth; + } + } + + DL = MBB.findDebugLoc(MBBI); + + // If there is an SUB32ri of ESP immediately before this instruction, merge + // the two. This can be the case when tail call elimination is enabled and + // the callee has more arguments then the caller. + NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); + + // If there is an ADD32ri or SUB32ri of ESP immediately after this + // instruction, merge the two instructions. + mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); + + // Adjust stack pointer: ESP -= numbytes. + + // Windows and cygwin/mingw require a prologue helper routine when allocating + // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw + // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the + // stack and adjust the stack pointer in one go. The 64-bit version of + // __chkstk is only responsible for probing the stack. The 64-bit prologue is + // responsible for adjusting the stack pointer. Touching the stack at 4K + // increments is necessary to ensure that the guard pages used by the OS + // virtual memory manager are allocated in correct sequence. + if (NumBytes >= 4096 && + (STI.isTargetCygMing() || STI.isTargetWin32()) && + !STI.isTargetEnvMacho()) { + // Check whether EAX is livein for this function. + bool isEAXAlive = isEAXLiveIn(MF); + + const char *StackProbeSymbol = + STI.isTargetWindows() ? "_chkstk" : "_alloca"; + if (Is64Bit && STI.isTargetCygMing()) + StackProbeSymbol = "__chkstk"; + unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; + if (!isEAXAlive) { + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes); + BuildMI(MBB, MBBI, DL, TII.get(CallOp)) + .addExternalSymbol(StackProbeSymbol) + .addReg(StackPtr, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + } else { + // Save EAX + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) + .addReg(X86::EAX, RegState::Kill); + + // Allocate NumBytes-4 bytes on stack. We'll also use 4 already + // allocated bytes for EAX. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes - 4); + BuildMI(MBB, MBBI, DL, TII.get(CallOp)) + .addExternalSymbol(StackProbeSymbol) + .addReg(StackPtr, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + + // Restore EAX + MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), + X86::EAX), + StackPtr, false, NumBytes - 4); + MBB.insert(MBBI, MI); + } + } else if (NumBytes >= 4096 && + STI.isTargetWin64() && + !STI.isTargetEnvMacho()) { + // Sanity check that EAX is not livein for this function. It should + // not be, so throw an assert. + assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!"); + + // Handle the 64-bit Windows ABI case where we need to call __chkstk. + // Function prologue is responsible for adjusting the stack pointer. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes); + BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32)) + .addExternalSymbol("__chkstk") + .addReg(StackPtr, RegState::Define | RegState::Implicit); + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + TII, *RegInfo); + } else if (NumBytes) + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + TII, *RegInfo); + + if ((NumBytes || PushedRegs) && needsFrameMoves) { + // Mark end of stack pointer adjustment. + MCSymbol *Label = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); + + if (!HasFP && NumBytes) { + // Define the current CFA rule to use the provided offset. + if (StackSize) { + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, + -StackSize + stackGrowth); + Moves.push_back(MachineMove(Label, SPDst, SPSrc)); + } else { + MachineLocation SPDst(StackPtr); + MachineLocation SPSrc(StackPtr, stackGrowth); + Moves.push_back(MachineMove(Label, SPDst, SPSrc)); + } + } + + // Emit DWARF info specifying the offsets of the callee-saved registers. + if (PushedRegs) + emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); + } +} + +void X86FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); + const X86InstrInfo &TII = *TM.getInstrInfo(); + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI != MBB.end() && "Returning block has no instructions"); + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc DL = MBBI->getDebugLoc(); + bool Is64Bit = STI.is64Bit(); + unsigned StackAlign = getStackAlignment(); + unsigned SlotSize = RegInfo->getSlotSize(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned StackPtr = RegInfo->getStackRegister(); + + switch (RetOpcode) { + default: + llvm_unreachable("Can only insert epilog into returning blocks"); + case X86::RET: + case X86::RETI: + case X86::TCRETURNdi: + case X86::TCRETURNri: + case X86::TCRETURNmi: + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + case X86::EH_RETURN: + case X86::EH_RETURN64: + break; // These are ok + } + + // Get the number of bytes to allocate from the FrameInfo. + uint64_t StackSize = MFI->getStackSize(); + uint64_t MaxAlign = MFI->getMaxAlignment(); + unsigned CSSize = X86FI->getCalleeSavedFrameSize(); + uint64_t NumBytes = 0; + + // If we're forcing a stack realignment we can't rely on just the frame + // info, we need to know the ABI stack alignment as well in case we + // have a call out. Otherwise just make sure we have some alignment - we'll + // go with the minimum. + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else + MaxAlign = MaxAlign ? MaxAlign : 4; + } + + if (hasFP(MF)) { + // Calculate required stack adjustment. + uint64_t FrameSize = StackSize - SlotSize; + if (RegInfo->needsStackRealignment(MF)) + FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; + + NumBytes = FrameSize - CSSize; + + // Pop EBP. + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); + } else { + NumBytes = StackSize - CSSize; + } + + // Skip the callee-saved pop instructions. + MachineBasicBlock::iterator LastCSPop = MBBI; + while (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PI = prior(MBBI); + unsigned Opc = PI->getOpcode(); + + if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && + !PI->getDesc().isTerminator()) + break; + + --MBBI; + } + + DL = MBBI->getDebugLoc(); + + // If there is an ADD32ri or SUB32ri of ESP immediately before this + // instruction, merge the two instructions. + if (NumBytes || MFI->hasVarSizedObjects()) + mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); + + // If dynamic alloca is used, then reset esp to point to the last callee-saved + // slot before popping them off! Same applies for the case, when stack was + // realigned. + if (RegInfo->needsStackRealignment(MF)) { + // We cannot use LEA here, because stack pointer was realigned. We need to + // deallocate local frame back. + if (CSSize) { + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo); + MBBI = prior(LastCSPop); + } + + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), + StackPtr).addReg(FramePtr); + } else if (MFI->hasVarSizedObjects()) { + if (CSSize) { + unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; + MachineInstr *MI = + addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), + FramePtr, false, -CSSize); + MBB.insert(MBBI, MI); + } else { + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) + .addReg(FramePtr); + } + } else if (NumBytes) { + // Adjust stack pointer back: ESP += numbytes. + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo); + } + + // We're returning from function via eh_return. + if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &DestAddr = MBBI->getOperand(0); + assert(DestAddr.isReg() && "Offset should be in register!"); + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), + StackPtr).addReg(DestAddr.getReg()); + } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || + RetOpcode == X86::TCRETURNmi || + RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || + RetOpcode == X86::TCRETURNmi64) { + bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; + // Tail call return: adjust the stack pointer and jump to callee. + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); + assert(StackAdjust.isImm() && "Expecting immediate value."); + + // Adjust stack pointer. + int StackAdj = StackAdjust.getImm(); + int MaxTCDelta = X86FI->getTCReturnAddrDelta(); + int Offset = 0; + assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); + + // Incoporate the retaddr area. + Offset = StackAdj-MaxTCDelta; + assert(Offset >= 0 && "Offset should never be negative"); + + if (Offset) { + // Check for possible merge with preceeding ADD instruction. + Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); + emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo); + } + + // Jump to label or value in register. + if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) + ? X86::TAILJMPd : X86::TAILJMPd64)); + if (JumpTarget.isGlobal()) + MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + else { + assert(JumpTarget.isSymbol()); + MIB.addExternalSymbol(JumpTarget.getSymbolName(), + JumpTarget.getTargetFlags()); + } + } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) + ? X86::TAILJMPm : X86::TAILJMPm64)); + for (unsigned i = 0; i != 5; ++i) + MIB.addOperand(MBBI->getOperand(i)); + } else if (RetOpcode == X86::TCRETURNri64) { + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). + addReg(JumpTarget.getReg(), RegState::Kill); + } else { + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). + addReg(JumpTarget.getReg(), RegState::Kill); + } + + MachineInstr *NewMI = prior(MBBI); + for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) + NewMI->addOperand(MBBI->getOperand(i)); + + // Delete the pseudo instruction TCRETURN. + MBB.erase(MBBI); + } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && + (X86FI->getTCReturnAddrDelta() < 0)) { + // Add the return addr area delta back since we are not tail calling. + int delta = -1*X86FI->getTCReturnAddrDelta(); + MBBI = MBB.getLastNonDebugInstr(); + + // Check for possible merge with preceeding ADD instruction. + delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); + emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo); + } +} + +void +X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const { + // Calculate amount of bytes used for return address storing + int stackGrowth = (STI.is64Bit() ? -8 : -4); + const X86RegisterInfo *RI = TM.getRegisterInfo(); + + // Initial state of the frame pointer is esp+stackGrowth. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(RI->getStackRegister(), stackGrowth); + Moves.push_back(MachineMove(0, Dst, Src)); + + // Add return address to move list + MachineLocation CSDst(RI->getStackRegister(), stackGrowth); + MachineLocation CSSrc(RI->getRARegister()); + Moves.push_back(MachineMove(0, CSDst, CSSrc)); +} + +int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { + const X86RegisterInfo *RI = + static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); + uint64_t StackSize = MFI->getStackSize(); + + if (RI->needsStackRealignment(MF)) { + if (FI < 0) { + // Skip the saved EBP. + Offset += RI->getSlotSize(); + } else { + unsigned Align = MFI->getObjectAlignment(FI); + assert((-(Offset + StackSize)) % Align == 0); + Align = 0; + return Offset + StackSize; + } + // FIXME: Support tail calls + } else { + if (!hasFP(MF)) + return Offset + StackSize; + + // Skip the saved EBP. + Offset += RI->getSlotSize(); + + // Skip the RETADDR move area + const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) + Offset -= TailCallReturnAddrDelta; + } + + return Offset; +} + +bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = MBB.findDebugLoc(MI); + + MachineFunction &MF = *MBB.getParent(); + + bool isWin64 = STI.isTargetWin64(); + unsigned SlotSize = STI.is64Bit() ? 8 : 4; + unsigned FPReg = TRI->getFrameRegister(MF); + unsigned CalleeFrameSize = 0; + + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + + unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + if (Reg == FPReg) + // X86RegisterInfo::emitPrologue will handle spilling of frame register. + continue; + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { + CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), + RC, TRI); + } + } + + X86FI->setCalleeSavedFrameSize(CalleeFrameSize); + return true; +} + +bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = MBB.findDebugLoc(MI); + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + unsigned FPReg = TRI->getFrameRegister(MF); + bool isWin64 = STI.isTargetWin64(); + unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (Reg == FPReg) + // X86RegisterInfo::emitEpilogue will handle restoring of frame register. + continue; + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { + BuildMI(MBB, MI, DL, TII.get(Opc), Reg); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + RC, TRI); + } + } + return true; +} + +void +X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); + unsigned SlotSize = RegInfo->getSlotSize(); + + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + + if (TailCallReturnAddrDelta < 0) { + // create RETURNADDR area + // arg + // arg + // RETADDR + // { ... + // RETADDR area + // ... + // } + // [EBP] + MFI->CreateFixedObject(-TailCallReturnAddrDelta, + (-1U*SlotSize)+TailCallReturnAddrDelta, true); + } + + if (hasFP(MF)) { + assert((TailCallReturnAddrDelta <= 0) && + "The Delta should always be zero or negative"); + const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); + + // Create a frame entry for the EBP register that must be saved. + int FrameIdx = MFI->CreateFixedObject(SlotSize, + -(int)SlotSize + + TFI.getOffsetOfLocalArea() + + TailCallReturnAddrDelta, + true); + assert(FrameIdx == MFI->getObjectIndexBegin() && + "Slot for EBP register must be last in order to be found!"); + FrameIdx = 0; + } +} diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h new file mode 100644 index 0000000..d71108c --- /dev/null +++ b/lib/Target/X86/X86FrameLowering.h @@ -0,0 +1,65 @@ +//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements X86-specific bits of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_FRAMELOWERING_H +#define X86_FRAMELOWERING_H + +#include "X86Subtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class MCSymbol; + class X86TargetMachine; + +class X86FrameLowering : public TargetFrameLowering { + const X86TargetMachine &TM; + const X86Subtarget &STI; +public: + explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti) + : TargetFrameLowering(StackGrowsDown, + sti.getStackAlignment(), + (sti.is64Bit() ? -8 : -4)), + TM(tm), STI(sti) { + } + + void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label, + unsigned FramePtr) const; + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool hasFP(const MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const; + + void getInitialFrameState(std::vector<MachineMove> &Moves) const; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index c523441..9b0ec6e 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -190,20 +190,19 @@ namespace { SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); - bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); - bool MatchLoad(SDValue N, X86ISelAddressMode &AM); + bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); bool MatchAddress(SDValue N, X86ISelAddressMode &AM); bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); - bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, + bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, + bool SelectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, + bool SelectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); bool SelectScalarSSELoad(SDNode *Root, SDValue N, @@ -264,12 +263,6 @@ namespace { return CurDAG->getTargetConstant(Imm, MVT::i8); } - /// getI16Imm - Return a target constant with the specified value, of type - /// i16. - inline SDValue getI16Imm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i16); - } - /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm) { @@ -511,10 +504,11 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), - MemTmp, NULL, 0, MemVT, + MemTmp, MachinePointerInfo(), MemVT, false, false, 0); - SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp, - NULL, 0, MemVT, false, false, 0); + SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, + MachinePointerInfo(), + MemVT, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the // extload we created. This will cause general havok on the dag because @@ -536,9 +530,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() { void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI) { const TargetInstrInfo *TII = TM.getInstrInfo(); - if (Subtarget->isTargetCygMing()) + if (Subtarget->isTargetCygMing()) { + unsigned CallOp = + Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32; BuildMI(BB, DebugLoc(), - TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); + TII->get(CallOp)).addExternalSymbol("__main"); + } } void X86DAGToDAGISel::EmitFunctionEntryCode() { @@ -549,29 +546,27 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() { } -bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N, - X86ISelAddressMode &AM) { - assert(N.getOpcode() == X86ISD::SegmentBaseAddress); - SDValue Segment = N.getOperand(0); - - if (AM.Segment.getNode() == 0) { - AM.Segment = Segment; - return false; - } - - return true; -} - -bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { +bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ + SDValue Address = N->getOperand(1); + + // load gs:0 -> GS segment register. + // load fs:0 -> FS segment register. + // // This optimization is valid because the GNU TLS model defines that // gs:0 (or fs:0 on X86-64) contains its own address. // For more information see http://people.redhat.com/drepper/tls.pdf - - SDValue Address = N.getOperand(1); - if (Address.getOpcode() == X86ISD::SegmentBaseAddress && - !MatchSegmentBaseAddress (Address, AM)) - return false; - + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) + if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 && + Subtarget->isTargetELF()) + switch (N->getPointerInfo().getAddrSpace()) { + case 256: + AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); + return false; + case 257: + AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); + return false; + } + return true; } @@ -690,25 +685,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { return false; } -/// isLogicallyAddWithConstant - Return true if this node is semantically an -/// add of a value with a constantint. -static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) { - // Check for (add x, Cst) - if (V->getOpcode() == ISD::ADD) - return isa<ConstantSDNode>(V->getOperand(1)); - - // Check for (or x, Cst), where Cst & x == 0. - if (V->getOpcode() != ISD::OR || - !isa<ConstantSDNode>(V->getOperand(1))) - return false; - - // Handle "X | C" as "X + C" iff X is known to have C bits clear. - ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1)); - - // Check to see if the LHS & C is zero. - return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue()); -} - bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { bool is64Bit = Subtarget->is64Bit(); @@ -756,11 +732,6 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, break; } - case X86ISD::SegmentBaseAddress: - if (!MatchSegmentBaseAddress(N, AM)) - return false; - break; - case X86ISD::Wrapper: case X86ISD::WrapperRIP: if (!MatchWrapper(N, AM)) @@ -768,7 +739,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, break; case ISD::LOAD: - if (!MatchLoad(N, AM)) + if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM)) return false; break; @@ -799,7 +770,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Okay, we know that we have a scale by now. However, if the scaled // value is an add of something and a constant, we can fold the // constant into the disp field here. - if (isLogicallyAddWithConstant(ShVal, CurDAG)) { + if (CurDAG->isBaseWithConstantOffset(ShVal)) { AM.IndexReg = ShVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); @@ -943,24 +914,18 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Add an artificial use to this node so that we can keep track of // it if it gets CSE'd with a different node. HandleSDNode Handle(N); - SDValue LHS = Handle.getValue().getNode()->getOperand(0); - SDValue RHS = Handle.getValue().getNode()->getOperand(1); X86ISelAddressMode Backup = AM; - if (!MatchAddressRecursively(LHS, AM, Depth+1) && - !MatchAddressRecursively(RHS, AM, Depth+1)) + if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && + !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) return false; AM = Backup; - LHS = Handle.getValue().getNode()->getOperand(0); - RHS = Handle.getValue().getNode()->getOperand(1); - + // Try again after commuting the operands. - if (!MatchAddressRecursively(RHS, AM, Depth+1) && - !MatchAddressRecursively(LHS, AM, Depth+1)) + if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&& + !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1)) return false; AM = Backup; - LHS = Handle.getValue().getNode()->getOperand(0); - RHS = Handle.getValue().getNode()->getOperand(1); // If we couldn't fold both operands into the address at the same time, // see if we can just put each operand into a register and fold at least @@ -968,17 +933,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (AM.BaseType == X86ISelAddressMode::RegBase && !AM.Base_Reg.getNode() && !AM.IndexReg.getNode()) { - AM.Base_Reg = LHS; - AM.IndexReg = RHS; + N = Handle.getValue(); + AM.Base_Reg = N.getOperand(0); + AM.IndexReg = N.getOperand(1); AM.Scale = 1; return false; } + N = Handle.getValue(); break; } case ISD::OR: // Handle "X | C" as "X + C" iff X is known to have C bits clear. - if (isLogicallyAddWithConstant(N, CurDAG)) { + if (CurDAG->isBaseWithConstantOffset(N)) { X86ISelAddressMode Backup = AM; ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); uint64_t Offset = CN->getSExtValue(); @@ -1148,10 +1115,30 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { /// SelectAddr - returns true if it is able pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. -bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, +/// +/// Parent is the parent node of the addr operand that is being matched. It +/// is always a load, store, atomic node, or null. It is only null when +/// checking memory operands for inline asm nodes. +bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; + + if (Parent && + // This list of opcodes are all the nodes that have an "addr:$ptr" operand + // that are not a MemSDNode, and thus don't have proper addrspace info. + Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme + Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores + Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme + unsigned AddrSpace = + cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace(); + // AddrSpace 256 -> GS, 257 -> FS. + if (AddrSpace == 256) + AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); + if (AddrSpace == 257) + AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); + } + if (MatchAddress(N, AM)) return false; @@ -1187,7 +1174,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); - if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment)) + if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; return true; } @@ -1205,7 +1192,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { // Okay, this is a zero extending load. Fold it. LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); - if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; PatternNodeWithChain = SDValue(LD, 0); return true; @@ -1216,7 +1203,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. -bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, +bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { @@ -1278,7 +1265,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, } /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. -bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, +bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); @@ -1311,7 +1298,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, !IsLegalToFold(N, P, P, OptLevel)) return false; - return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); + return SelectAddr(N.getNode(), + N.getOperand(1), Base, Scale, Index, Disp, Segment); } /// getGlobalBaseReg - Return an SDNode that returns the value of @@ -1329,7 +1317,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue In2L = Node->getOperand(2); SDValue In2H = Node->getOperand(3); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return NULL; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); @@ -1355,7 +1343,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { SDValue Ptr = Node->getOperand(1); SDValue Val = Node->getOperand(2); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return 0; bool isInc = false, isDec = false, isSub = false, isCN = false; @@ -1592,7 +1580,32 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return RetVal; break; } - + case X86ISD::UMUL: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + unsigned LoReg; + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break; + case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break; + case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break; + case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break; + } + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + N0, SDValue()).getValue(1); + + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); + SDValue Ops[] = {N1, InFlag}; + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); + + ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); + ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); + return NULL; + } + case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { SDValue N0 = Node->getOperand(0); @@ -1642,14 +1655,15 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, array_lengthof(Ops)); InFlag = SDValue(CNode, 1); + // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); } else { - InFlag = - SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag); + InFlag = SDValue(CNode, 0); } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -1688,7 +1702,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - + return NULL; } @@ -1773,7 +1787,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (isSigned && !signBitIsZero) { // Sign extend the low part into the high part. InFlag = - SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); + SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); } else { // Zero out the high part, effectively zero extending the input. SDValue ClrNode = @@ -1787,14 +1801,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, array_lengthof(Ops)); InFlag = SDValue(CNode, 1); // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); } else { InFlag = - SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0); } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -1971,7 +1985,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, case 'v': // not offsetable ?? default: return true; case 'm': // memory - if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4)) + if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4)) return true; break; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a6db979..27024b4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16,9 +16,9 @@ #include "X86.h" #include "X86InstrBuilder.h" #include "X86ISelLowering.h" -#include "X86ShuffleDecode.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" +#include "Utils/X86ShuffleDecode.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -28,6 +28,7 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" +#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -56,39 +57,172 @@ using namespace dwarf; STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt<bool> -DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); +Disable256Bit("disable-256bit", cl::Hidden, + cl::desc("Disable use of 256-bit vectors")); // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); +static SDValue Insert128BitVector(SDValue Result, + SDValue Vec, + SDValue Idx, + SelectionDAG &DAG, + DebugLoc dl); + +static SDValue Extract128BitVector(SDValue Vec, + SDValue Idx, + SelectionDAG &DAG, + DebugLoc dl); + +static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG); + + +/// Generate a DAG to grab 128-bits from a vector > 128 bits. This +/// sets things up to match to an AVX VEXTRACTF128 instruction or a +/// simple subregister reference. Idx is an index in the 128 bits we +/// want. It need not be aligned to a 128-bit bounday. That makes +/// lowering EXTRACT_VECTOR_ELT operations easier. +static SDValue Extract128BitVector(SDValue Vec, + SDValue Idx, + SelectionDAG &DAG, + DebugLoc dl) { + EVT VT = Vec.getValueType(); + assert(VT.getSizeInBits() == 256 && "Unexpected vector size!"); + + EVT ElVT = VT.getVectorElementType(); + + int Factor = VT.getSizeInBits() / 128; + + EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), + ElVT, + VT.getVectorNumElements() / Factor); + + // Extract from UNDEF is UNDEF. + if (Vec.getOpcode() == ISD::UNDEF) + return DAG.getNode(ISD::UNDEF, dl, ResultVT); + + if (isa<ConstantSDNode>(Idx)) { + unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR + // we can match to VEXTRACTF128. + unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits(); + + // This is the index of the first element of the 128-bit chunk + // we want. + unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) + * ElemsPerChunk); + + SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); + + SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, + VecIdx); + + return Result; + } + + return SDValue(); +} + +/// Generate a DAG to put 128-bits into a vector > 128 bits. This +/// sets things up to match to an AVX VINSERTF128 instruction or a +/// simple superregister reference. Idx is an index in the 128 bits +/// we want. It need not be aligned to a 128-bit bounday. That makes +/// lowering INSERT_VECTOR_ELT operations easier. +static SDValue Insert128BitVector(SDValue Result, + SDValue Vec, + SDValue Idx, + SelectionDAG &DAG, + DebugLoc dl) { + if (isa<ConstantSDNode>(Idx)) { + EVT VT = Vec.getValueType(); + assert(VT.getSizeInBits() == 128 && "Unexpected vector size!"); + + EVT ElVT = VT.getVectorElementType(); + + unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + EVT ResultVT = Result.getValueType(); + + // Insert the relevant 128 bits. + unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits(); + + // This is the index of the first element of the 128-bit chunk + // we want. + unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) + * ElemsPerChunk); + + SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); + + Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, + VecIdx); + return Result; + } + + return SDValue(); +} + +/// Given two vectors, concat them. +static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) { + DebugLoc dl = Lower.getDebugLoc(); + + assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!"); + + EVT VT = EVT::getVectorVT(*DAG.getContext(), + Lower.getValueType().getVectorElementType(), + Lower.getValueType().getVectorNumElements() * 2); + + // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors). + assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!"); + + // Insert the upper subvector. + SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper, + DAG.getConstant( + // This is half the length of the result + // vector. Start inserting the upper 128 + // bits here. + Lower.getValueType().getVectorNumElements(), + MVT::i32), + DAG, dl); + + // Insert the lower subvector. + Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl); + return Vec; +} + static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { - - bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); - - if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) { - if (is64Bit) return new X8664_MachoTargetObjectFile(); + const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); + bool is64Bit = Subtarget->is64Bit(); + + if (Subtarget->isTargetEnvMacho()) { + if (is64Bit) + return new X8664_MachoTargetObjectFile(); return new TargetLoweringObjectFileMachO(); - } else if (TM.getSubtarget<X86Subtarget>().isTargetELF() ){ - if (is64Bit) return new X8664_ELFTargetObjectFile(TM); + } + + if (Subtarget->isTargetELF()) { + if (is64Bit) + return new X8664_ELFTargetObjectFile(TM); return new X8632_ELFTargetObjectFile(TM); - } else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) { + } + if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) return new TargetLoweringObjectFileCOFF(); - } llvm_unreachable("unknown subtarget type"); } X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget<X86Subtarget>(); - X86ScalarSSEf64 = Subtarget->hasSSE2(); - X86ScalarSSEf32 = Subtarget->hasSSE1(); + X86ScalarSSEf64 = Subtarget->hasXMMInt(); + X86ScalarSSEf32 = Subtarget->hasXMM(); X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; RegInfo = TM.getRegisterInfo(); TD = getTargetData(); // Set up the TargetLowering object. + static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; // X86 is weird, it always uses i8 for shift amounts and setcc results. setShiftAmountType(MVT::i8); @@ -96,6 +230,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); + if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { + // Setup Windows compiler runtime calls. + setLibcallName(RTLIB::SDIV_I64, "_alldiv"); + setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); + setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2"); + setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2"); + setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C); + setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C); + } + if (Subtarget->isTargetDarwin()) { // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmp(false); @@ -213,16 +359,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } // TODO: when we have SSE, these could be more efficient, by using movd/movq. - if (!X86ScalarSSEf64) { - setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); - setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); + if (!X86ScalarSSEf64) { + setOperationAction(ISD::BITCAST , MVT::f32 , Expand); + setOperationAction(ISD::BITCAST , MVT::i32 , Expand); if (Subtarget->is64Bit()) { - setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand); - // Without SSE, i64->f64 goes through memory; i64->MMX is Legal. - if (Subtarget->hasMMX() && !DisableMMX) - setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom); - else - setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand); + setOperationAction(ISD::BITCAST , MVT::f64 , Expand); + // Without SSE, i64->f64 goes through memory. + setOperationAction(ISD::BITCAST , MVT::i64 , Expand); } } @@ -236,30 +379,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // (low) operations are left as Legal, as there are single-result // instructions for this in x86. Using the two-result multiply instructions // when both high and low results are needed must be arranged by dagcombine. - setOperationAction(ISD::MULHS , MVT::i8 , Expand); - setOperationAction(ISD::MULHU , MVT::i8 , Expand); - setOperationAction(ISD::SDIV , MVT::i8 , Expand); - setOperationAction(ISD::UDIV , MVT::i8 , Expand); - setOperationAction(ISD::SREM , MVT::i8 , Expand); - setOperationAction(ISD::UREM , MVT::i8 , Expand); - setOperationAction(ISD::MULHS , MVT::i16 , Expand); - setOperationAction(ISD::MULHU , MVT::i16 , Expand); - setOperationAction(ISD::SDIV , MVT::i16 , Expand); - setOperationAction(ISD::UDIV , MVT::i16 , Expand); - setOperationAction(ISD::SREM , MVT::i16 , Expand); - setOperationAction(ISD::UREM , MVT::i16 , Expand); - setOperationAction(ISD::MULHS , MVT::i32 , Expand); - setOperationAction(ISD::MULHU , MVT::i32 , Expand); - setOperationAction(ISD::SDIV , MVT::i32 , Expand); - setOperationAction(ISD::UDIV , MVT::i32 , Expand); - setOperationAction(ISD::SREM , MVT::i32 , Expand); - setOperationAction(ISD::UREM , MVT::i32 , Expand); - setOperationAction(ISD::MULHS , MVT::i64 , Expand); - setOperationAction(ISD::MULHU , MVT::i64 , Expand); - setOperationAction(ISD::SDIV , MVT::i64 , Expand); - setOperationAction(ISD::UDIV , MVT::i64 , Expand); - setOperationAction(ISD::SREM , MVT::i64 , Expand); - setOperationAction(ISD::UREM , MVT::i64 , Expand); + for (unsigned i = 0, e = 4; i != e; ++i) { + MVT VT = IntVTs[i]; + setOperationAction(ISD::MULHS, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + + // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences. + setOperationAction(ISD::ADDC, VT, Custom); + setOperationAction(ISD::ADDE, VT, Custom); + setOperationAction(ISD::SUBC, VT, Custom); + setOperationAction(ISD::SUBE, VT, Custom); + } setOperationAction(ISD::BR_JT , MVT::Other, Expand); setOperationAction(ISD::BRCOND , MVT::Other, Custom); @@ -276,21 +410,27 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); - setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTTZ , MVT::i8 , Custom); setOperationAction(ISD::CTLZ , MVT::i8 , Custom); - setOperationAction(ISD::CTPOP , MVT::i16 , Expand); setOperationAction(ISD::CTTZ , MVT::i16 , Custom); setOperationAction(ISD::CTLZ , MVT::i16 , Custom); - setOperationAction(ISD::CTPOP , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Custom); setOperationAction(ISD::CTLZ , MVT::i32 , Custom); if (Subtarget->is64Bit()) { - setOperationAction(ISD::CTPOP , MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Custom); setOperationAction(ISD::CTLZ , MVT::i64 , Custom); } + if (Subtarget->hasPOPCNT()) { + setOperationAction(ISD::CTPOP , MVT::i8 , Promote); + } else { + setOperationAction(ISD::CTPOP , MVT::i8 , Expand); + setOperationAction(ISD::CTPOP , MVT::i16 , Expand); + setOperationAction(ISD::CTPOP , MVT::i32 , Expand); + if (Subtarget->is64Bit()) + setOperationAction(ISD::CTPOP , MVT::i64 , Expand); + } + setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); setOperationAction(ISD::BSWAP , MVT::i16 , Expand); @@ -298,7 +438,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT , MVT::i1 , Promote); // X86 wants to expand cmov itself. setOperationAction(ISD::SELECT , MVT::i8 , Custom); - setOperationAction(ISD::SELECT , MVT::i16 , Custom); + setOperationAction(ISD::SELECT , MVT::i16 , Custom); setOperationAction(ISD::SELECT , MVT::i32 , Custom); setOperationAction(ISD::SELECT , MVT::f32 , Custom); setOperationAction(ISD::SELECT , MVT::f64 , Custom); @@ -341,12 +481,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); } - if (Subtarget->hasSSE1()) + if (Subtarget->hasXMM()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); // We may not have a libcall for MEMBARRIER so we should lower this. setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom); - + // On X86 and X86-64, atomic operations are lowered to locked instructions. // Locked instructions, in turn, have implicit fence semantics (all memory // operations are flushed before issuing the locked instruction, and they @@ -355,15 +495,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setShouldFoldAtomicFences(true); // Expand certain atomics - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); - - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); + for (unsigned i = 0, e = 4; i != e; ++i) { + MVT VT = IntVTs[i]; + setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + } if (!Subtarget->is64Bit()) { setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); @@ -415,7 +551,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - if (Subtarget->isTargetCygMing()) + if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); else setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); @@ -512,13 +648,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); { - bool ignored; - APFloat TmpFlt(+0.0); - TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, - &ignored); + APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended); addLegalFPImmediate(TmpFlt); // FLD0 TmpFlt.changeSign(); addLegalFPImmediate(TmpFlt); // FLD0/FCHS + + bool ignored; APFloat TmpFlt2(+1.0); TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, &ignored); @@ -564,8 +699,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand); - setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand); setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand); + setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand); setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand); @@ -613,91 +749,44 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. - if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) { - addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false); - addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false); - addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false); - - addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false); - - setOperationAction(ISD::ADD, MVT::v8i8, Legal); - setOperationAction(ISD::ADD, MVT::v4i16, Legal); - setOperationAction(ISD::ADD, MVT::v2i32, Legal); - setOperationAction(ISD::ADD, MVT::v1i64, Legal); - - setOperationAction(ISD::SUB, MVT::v8i8, Legal); - setOperationAction(ISD::SUB, MVT::v4i16, Legal); - setOperationAction(ISD::SUB, MVT::v2i32, Legal); - setOperationAction(ISD::SUB, MVT::v1i64, Legal); - - setOperationAction(ISD::MULHS, MVT::v4i16, Legal); - setOperationAction(ISD::MUL, MVT::v4i16, Legal); - - setOperationAction(ISD::AND, MVT::v8i8, Promote); - AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::AND, MVT::v4i16, Promote); - AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::AND, MVT::v2i32, Promote); - AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::AND, MVT::v1i64, Legal); - - setOperationAction(ISD::OR, MVT::v8i8, Promote); - AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::OR, MVT::v4i16, Promote); - AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::OR, MVT::v2i32, Promote); - AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::OR, MVT::v1i64, Legal); - - setOperationAction(ISD::XOR, MVT::v8i8, Promote); - AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::XOR, MVT::v4i16, Promote); - AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::XOR, MVT::v2i32, Promote); - AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::XOR, MVT::v1i64, Legal); - - setOperationAction(ISD::LOAD, MVT::v8i8, Promote); - AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v4i16, Promote); - AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v2i32, Promote); - AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v1i64, Legal); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); - - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); - - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); - - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); - - setOperationAction(ISD::SELECT, MVT::v8i8, Promote); - setOperationAction(ISD::SELECT, MVT::v4i16, Promote); - setOperationAction(ISD::SELECT, MVT::v2i32, Promote); - setOperationAction(ISD::SELECT, MVT::v1i64, Custom); - setOperationAction(ISD::VSETCC, MVT::v8i8, Custom); - setOperationAction(ISD::VSETCC, MVT::v4i16, Custom); - setOperationAction(ISD::VSETCC, MVT::v2i32, Custom); - - if (!X86ScalarSSEf64 && Subtarget->is64Bit()) { - setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom); - } - } - - if (!UseSoftFloat && Subtarget->hasSSE1()) { + if (!UseSoftFloat && Subtarget->hasMMX()) { + addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass); + // No operations on x86mmx supported, everything uses intrinsics. + } + + // MMX-sized vectors (other than x86mmx) are expected to be expanded + // into smaller operations. + setOperationAction(ISD::MULHS, MVT::v8i8, Expand); + setOperationAction(ISD::MULHS, MVT::v4i16, Expand); + setOperationAction(ISD::MULHS, MVT::v2i32, Expand); + setOperationAction(ISD::MULHS, MVT::v1i64, Expand); + setOperationAction(ISD::AND, MVT::v8i8, Expand); + setOperationAction(ISD::AND, MVT::v4i16, Expand); + setOperationAction(ISD::AND, MVT::v2i32, Expand); + setOperationAction(ISD::AND, MVT::v1i64, Expand); + setOperationAction(ISD::OR, MVT::v8i8, Expand); + setOperationAction(ISD::OR, MVT::v4i16, Expand); + setOperationAction(ISD::OR, MVT::v2i32, Expand); + setOperationAction(ISD::OR, MVT::v1i64, Expand); + setOperationAction(ISD::XOR, MVT::v8i8, Expand); + setOperationAction(ISD::XOR, MVT::v4i16, Expand); + setOperationAction(ISD::XOR, MVT::v2i32, Expand); + setOperationAction(ISD::XOR, MVT::v1i64, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand); + setOperationAction(ISD::SELECT, MVT::v8i8, Expand); + setOperationAction(ISD::SELECT, MVT::v4i16, Expand); + setOperationAction(ISD::SELECT, MVT::v2i32, Expand); + setOperationAction(ISD::SELECT, MVT::v1i64, Expand); + setOperationAction(ISD::BITCAST, MVT::v8i8, Expand); + setOperationAction(ISD::BITCAST, MVT::v4i16, Expand); + setOperationAction(ISD::BITCAST, MVT::v2i32, Expand); + setOperationAction(ISD::BITCAST, MVT::v1i64, Expand); + + if (!UseSoftFloat && Subtarget->hasXMM()) { addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); @@ -714,7 +803,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSETCC, MVT::v4f32, Custom); } - if (!UseSoftFloat && Subtarget->hasSSE2()) { + if (!UseSoftFloat && Subtarget->hasXMMInt()) { addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM @@ -795,7 +884,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Do not attempt to promote non-128-bit vectors if (!VT.is128BitVector()) continue; - + setOperationAction(ISD::AND, SVT, Promote); AddPromotedToType (ISD::AND, SVT, MVT::v2i64); setOperationAction(ISD::OR, SVT, Promote); @@ -818,10 +907,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); - if (!DisableMMX && Subtarget->hasMMX()) { - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - } } if (Subtarget->hasSSE41()) { @@ -863,9 +948,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } } - if (Subtarget->hasSSE42()) { + if (Subtarget->hasSSE42()) setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); - } if (!UseSoftFloat && Subtarget->hasAVX()) { addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); @@ -878,27 +962,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::LOAD, MVT::v8i32, Legal); setOperationAction(ISD::LOAD, MVT::v4f64, Legal); setOperationAction(ISD::LOAD, MVT::v4i64, Legal); + setOperationAction(ISD::FADD, MVT::v8f32, Legal); setOperationAction(ISD::FSUB, MVT::v8f32, Legal); setOperationAction(ISD::FMUL, MVT::v8f32, Legal); setOperationAction(ISD::FDIV, MVT::v8f32, Legal); setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); setOperationAction(ISD::FNEG, MVT::v8f32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom); - //setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom); - //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom); - //setOperationAction(ISD::SELECT, MVT::v8f32, Custom); - //setOperationAction(ISD::VSETCC, MVT::v8f32, Custom); - - // Operations to consider commented out -v16i16 v32i8 - //setOperationAction(ISD::ADD, MVT::v16i16, Legal); - setOperationAction(ISD::ADD, MVT::v8i32, Custom); - setOperationAction(ISD::ADD, MVT::v4i64, Custom); - //setOperationAction(ISD::SUB, MVT::v32i8, Legal); - //setOperationAction(ISD::SUB, MVT::v16i16, Legal); - setOperationAction(ISD::SUB, MVT::v8i32, Custom); - setOperationAction(ISD::SUB, MVT::v4i64, Custom); - //setOperationAction(ISD::MUL, MVT::v16i16, Legal); + setOperationAction(ISD::FADD, MVT::v4f64, Legal); setOperationAction(ISD::FSUB, MVT::v4f64, Legal); setOperationAction(ISD::FMUL, MVT::v4f64, Legal); @@ -906,85 +977,66 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FNEG, MVT::v4f64, Custom); - setOperationAction(ISD::VSETCC, MVT::v4f64, Custom); - // setOperationAction(ISD::VSETCC, MVT::v32i8, Custom); - // setOperationAction(ISD::VSETCC, MVT::v16i16, Custom); - setOperationAction(ISD::VSETCC, MVT::v8i32, Custom); - - // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i8, Custom); - // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i16, Custom); - // setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i16, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i32, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f32, Custom); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i64, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f64, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom); - -#if 0 - // Not sure we want to do this since there are no 256-bit integer - // operations in AVX - - // Custom lower build_vector, vector_shuffle, and extract_vector_elt. - // This includes 256-bit vectors - for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) { - EVT VT = (MVT::SimpleValueType)i; - - // Do not attempt to custom lower non-power-of-2 vectors - if (!isPowerOf2_32(VT.getVectorNumElements())) + // Custom lower build_vector, vector_shuffle, scalar_to_vector, + // insert_vector_elt extract_subvector and extract_vector_elt for + // 256-bit types. + for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; + ++i) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)i; + // Do not attempt to custom lower non-256-bit vectors + if (!isPowerOf2_32(MVT(VT).getVectorNumElements()) + || (MVT(VT).getSizeInBits() < 256)) continue; - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - } + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + } + // Custom-lower insert_subvector and extract_subvector based on + // the result type. + for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; + ++i) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)i; + // Do not attempt to custom lower non-256-bit vectors + if (!isPowerOf2_32(MVT(VT).getVectorNumElements())) + continue; - if (Subtarget->is64Bit()) { - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom); + if (MVT(VT).getSizeInBits() == 128) { + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + } + else if (MVT(VT).getSizeInBits() == 256) { + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + } } -#endif -#if 0 - // Not sure we want to do this since there are no 256-bit integer - // operations in AVX - - // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64. - // Including 256-bit vectors - for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) { - EVT VT = (MVT::SimpleValueType)i; + // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. + // Don't promote loads because we need them for VPERM vector index versions. - if (!VT.is256BitVector()) { + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; + VT++) { + if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements()) + || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256)) continue; - } - setOperationAction(ISD::AND, VT, Promote); - AddPromotedToType (ISD::AND, VT, MVT::v4i64); - setOperationAction(ISD::OR, VT, Promote); - AddPromotedToType (ISD::OR, VT, MVT::v4i64); - setOperationAction(ISD::XOR, VT, Promote); - AddPromotedToType (ISD::XOR, VT, MVT::v4i64); - setOperationAction(ISD::LOAD, VT, Promote); - AddPromotedToType (ISD::LOAD, VT, MVT::v4i64); - setOperationAction(ISD::SELECT, VT, Promote); - AddPromotedToType (ISD::SELECT, VT, MVT::v4i64); + setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v4i64); + setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v4i64); + setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v4i64); + //setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); + //AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v4i64); + setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64); } - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); -#endif } // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - // Add/Sub/Mul with overflow operations are custom lowered. - setOperationAction(ISD::SADDO, MVT::i32, Custom); - setOperationAction(ISD::UADDO, MVT::i32, Custom); - setOperationAction(ISD::SSUBO, MVT::i32, Custom); - setOperationAction(ISD::USUBO, MVT::i32, Custom); - setOperationAction(ISD::SMULO, MVT::i32, Custom); // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. @@ -992,14 +1044,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: We really should do custom legalization for addition and // subtraction on x86-32 once PR3203 is fixed. We really can't do much better // than generic legalization for 64-bit multiplication-with-overflow, though. - if (Subtarget->is64Bit()) { - setOperationAction(ISD::SADDO, MVT::i64, Custom); - setOperationAction(ISD::UADDO, MVT::i64, Custom); - setOperationAction(ISD::SSUBO, MVT::i64, Custom); - setOperationAction(ISD::USUBO, MVT::i64, Custom); - setOperationAction(ISD::SMULO, MVT::i64, Custom); + for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) { + // Add/Sub/Mul with overflow operations are custom lowered. + MVT VT = IntVTs[i]; + setOperationAction(ISD::SADDO, VT, Custom); + setOperationAction(ISD::UADDO, VT, Custom); + setOperationAction(ISD::SSUBO, VT, Custom); + setOperationAction(ISD::USUBO, VT, Custom); + setOperationAction(ISD::SMULO, VT, Custom); + setOperationAction(ISD::UMULO, VT, Custom); } + // There are no 8-bit 3-address imul/mul instructions + setOperationAction(ISD::SMULO, MVT::i8, Expand); + setOperationAction(ISD::UMULO, MVT::i8, Expand); + if (!Subtarget->is64Bit()) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, 0); @@ -1016,6 +1075,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::ZERO_EXTEND); if (Subtarget->is64Bit()) @@ -1023,11 +1085,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) computeRegisterProperties(); - // FIXME: These should be based on subtarget info. Plus, the values should - // be smaller when we are in optimizing for size mode. + // On Darwin, -Os means optimize for size without hurting performance, + // do not reduce the limit. maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores + maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores - maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores + maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores + maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; setPrefLoopAlignment(16); benefitFromCodePlacementOpt = true; } @@ -1078,7 +1143,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { } unsigned Align = 4; - if (Subtarget->hasSSE1()) + if (Subtarget->hasXMM()) getMaxByValAlign(Ty, Align); return Align; } @@ -1119,7 +1184,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, } else if (!MemcpyStrSrc && Size >= 8 && !Subtarget->is64Bit() && Subtarget->getStackAlignment() >= 8 && - Subtarget->hasSSE2()) { + Subtarget->hasXMMInt()) { // Do not use f64 to lower memcpy if source is string constant. It's // better to use i32 to avoid the loads. return MVT::f64; @@ -1139,21 +1204,11 @@ unsigned X86TargetLowering::getJumpTableEncoding() const { if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && Subtarget->isPICStyleGOT()) return MachineJumpTableInfo::EK_Custom32; - + // Otherwise, use the normal jump table encoding heuristics. return TargetLowering::getJumpTableEncoding(); } -/// getPICBaseSymbol - Return the X86-32 PIC base. -MCSymbol * -X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF, - MCContext &Ctx) const { - const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo(); - return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+ - Twine(MF->getFunctionNumber())+"$pb"); -} - - const MCExpr * X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, @@ -1188,7 +1243,7 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); // Otherwise, the reference is relative to the PIC base. - return MCSymbolRefExpr::Create(getPICBaseSymbol(MF, Ctx), Ctx); + return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx); } /// getFunctionAlignment - Return the Log2 alignment of this function. @@ -1196,6 +1251,7 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; } +// FIXME: Why this routine is here? Move to RegInfo! std::pair<const TargetRegisterClass*, uint8_t> X86TargetLowering::findRepresentativeClass(EVT VT) const{ const TargetRegisterClass *RRC = 0; @@ -1207,8 +1263,7 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{ RRC = (Subtarget->is64Bit() ? X86::GR64RegisterClass : X86::GR32RegisterClass); break; - case MVT::v8i8: case MVT::v4i16: - case MVT::v2i32: case MVT::v1i64: + case MVT::x86mmx: RRC = X86::VR64RegisterClass; break; case MVT::f32: case MVT::f64: @@ -1222,10 +1277,13 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{ return std::make_pair(RRC, Cost); } +// FIXME: Why this routine is here? Move to RegInfo! unsigned X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0; + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; switch (RC->getID()) { default: return 0; @@ -1267,7 +1325,7 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, #include "X86GenCallingConv.inc" -bool +bool X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { @@ -1312,16 +1370,18 @@ X86TargetLowering::LowerReturn(SDValue Chain, SDValue ValToCopy = OutVals[i]; EVT ValVT = ValToCopy.getValueType(); - // If this is x86-64, and we disabled SSE, we can't return FP values - if ((ValVT == MVT::f32 || ValVT == MVT::f64) && - (Subtarget->is64Bit() && !Subtarget->hasSSE1())) { + // If this is x86-64, and we disabled SSE, we can't return FP values, + // or SSE or MMX vectors. + if ((ValVT == MVT::f32 || ValVT == MVT::f64 || + VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && + (Subtarget->is64Bit() && !Subtarget->hasXMM())) { report_fatal_error("SSE register return with SSE disabled"); } // Likewise we can't return F64 values with SSE1 only. gcc does so, but // llvm-gcc has never done it right and no one has noticed, so this // should be OK for now. if (ValVT == MVT::f64 && - (Subtarget->is64Bit() && !Subtarget->hasSSE2())) + (Subtarget->is64Bit() && !Subtarget->hasXMMInt())) report_fatal_error("SSE2 register return with SSE2 disabled"); // Returns in ST0/ST1 are handled specially: these are pushed as operands to @@ -1340,20 +1400,19 @@ X86TargetLowering::LowerReturn(SDValue Chain, // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 // which is returned in RAX / RDX. if (Subtarget->is64Bit()) { - if (ValVT.isVector() && ValVT.getSizeInBits() == 64) { - ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); + if (ValVT == MVT::x86mmx) { if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { + ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy); ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); - // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. if (!Subtarget->hasSSE2()) - ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy); + ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy); } } } - + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); Flag = Chain.getValue(1); } @@ -1367,7 +1426,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); - assert(Reg && + assert(Reg && "SRetReturnReg should have been set in LowerFormalArguments()."); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); @@ -1388,6 +1447,28 @@ X86TargetLowering::LowerReturn(SDValue Chain, MVT::Other, &RetOps[0], RetOps.size()); } +bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const { + if (N->getNumValues() != 1) + return false; + if (!N->hasNUsesOfValue(1, 0)) + return false; + + SDNode *Copy = *N->use_begin(); + if (Copy->getOpcode() != ISD::CopyToReg && + Copy->getOpcode() != ISD::FP_EXTEND) + return false; + + bool HasRet = false; + for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); + UI != UE; ++UI) { + if (UI->getOpcode() != X86ISD::RET_FLAG) + return false; + HasRet = true; + } + + return HasRet; +} + /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. /// @@ -1412,7 +1493,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && - ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { + ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) { report_fatal_error("SSE register return with SSE disabled"); } @@ -1433,7 +1514,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64; if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80; SDValue Ops[] = { Chain, InFlag }; - Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag, + Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Glue, Ops, 2), 1); Val = Chain.getValue(0); @@ -1456,7 +1537,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, MVT::i64, InFlag).getValue(1); Val = Chain.getValue(0); } - Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val); + Val = DAG.getNode(ISD::BITCAST, dl, CopyVT, Val); } else { Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag).getValue(1); @@ -1499,30 +1580,6 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { return Ins[0].Flags.isSRet(); } -/// CCAssignFnForNode - Selects the correct CCAssignFn for a the -/// given CallingConvention value. -CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { - if (Subtarget->is64Bit()) { - if (CC == CallingConv::GHC) - return CC_X86_64_GHC; - else if (Subtarget->isTargetWin64()) - return CC_X86_Win64_C; - else - return CC_X86_64_C; - } - - if (CC == CallingConv::X86_FastCall) - return CC_X86_32_FastCall; - else if (CC == CallingConv::X86_ThisCall) - return CC_X86_32_ThisCall; - else if (CC == CallingConv::Fast) - return CC_X86_32_FastCC; - else if (CC == CallingConv::GHC) - return CC_X86_32_GHC; - else - return CC_X86_32_C; -} - /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval @@ -1531,10 +1588,11 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, DebugLoc dl) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile*/false, /*AlwaysInline=*/true, - NULL, 0, NULL, 0); + MachinePointerInfo(), MachinePointerInfo()); } /// IsTailCallConvention - Return true if the calling convention is one that @@ -1583,7 +1641,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, VA.getLocMemOffset(), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); return DAG.getLoad(ValVT, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0); } } @@ -1617,7 +1675,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); + + // Allocate shadow area for Win64 + if (IsWin64) { + CCInfo.AllocateStack(32, 8); + } + + CCInfo.AnalyzeFormalArguments(Ins, CC_X86); unsigned LastVal = ~0U; SDValue ArgValue; @@ -1644,12 +1708,12 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, RC = X86::VR256RegisterClass; else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) RC = X86::VR128RegisterClass; - else if (RegVT.isVector() && RegVT.getSizeInBits() == 64) + else if (RegVT == MVT::x86mmx) RC = X86::VR64RegisterClass; else llvm_unreachable("Unknown argument type!"); - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 @@ -1662,14 +1726,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::BCvt) - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); if (VA.isExtInLoc()) { // Handle MMX values passed in XMM regs. if (RegVT.isVector()) { - ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, - ArgValue, DAG.getConstant(0, MVT::i64)); - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), + ArgValue); } else ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); } @@ -1680,8 +1743,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If value is passed via pointer - do a load. if (VA.getLocInfo() == CCValAssign::Indirect) - ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0, - false, false, 0); + ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, + MachinePointerInfo(), false, false, 0); InVals.push_back(ArgValue); } @@ -1708,8 +1771,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - if (Is64Bit || (CallConv != CallingConv::X86_FastCall && - CallConv != CallingConv::X86_ThisCall)) { + if (!IsWin64 && (Is64Bit || (CallConv != CallingConv::X86_FastCall && + CallConv != CallingConv::X86_ThisCall))) { FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true)); } if (Is64Bit) { @@ -1719,9 +1782,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, static const unsigned GPR64ArgRegsWin64[] = { X86::RCX, X86::RDX, X86::R8, X86::R9 }; - static const unsigned XMMArgRegsWin64[] = { - X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 - }; static const unsigned GPR64ArgRegs64Bit[] = { X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 }; @@ -1729,40 +1789,52 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; - const unsigned *GPR64ArgRegs, *XMMArgRegs; + const unsigned *GPR64ArgRegs; + unsigned NumXMMRegs = 0; if (IsWin64) { - TotalNumIntRegs = 4; TotalNumXMMRegs = 4; + // The XMM registers which might contain var arg parameters are shadowed + // in their paired GPR. So we only need to save the GPR to their home + // slots. + TotalNumIntRegs = 4; GPR64ArgRegs = GPR64ArgRegsWin64; - XMMArgRegs = XMMArgRegsWin64; } else { TotalNumIntRegs = 6; TotalNumXMMRegs = 8; GPR64ArgRegs = GPR64ArgRegs64Bit; - XMMArgRegs = XMMArgRegs64Bit; + + NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs); } unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, TotalNumIntRegs); - unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, - TotalNumXMMRegs); bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); - assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && + assert(!(NumXMMRegs && !Subtarget->hasXMM()) && "SSE register cannot be used when SSE is disabled!"); assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); - if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1()) + if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM()) // Kernel mode asks for SSE to be disabled, so don't push them // on the stack. TotalNumXMMRegs = 0; - // For X86-64, if there are vararg parameters that are passed via - // registers, then we must store them to their spots on the stack so they - // may be loaded by deferencing the result of va_next. - FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); - FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16); - FuncInfo->setRegSaveFrameIndex( - MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16, + if (IsWin64) { + const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering(); + // Get to the caller-allocated home save location. Add 8 to account + // for the return address. + int HomeOffset = TFI.getOffsetOfLocalArea() + 8; + FuncInfo->setRegSaveFrameIndex( + MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); + FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); + } else { + // For X86-64, if there are vararg parameters that are passed via + // registers, then we must store them to their spots on the stack so they + // may be loaded by deferencing the result of va_next. + FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); + FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16); + FuncInfo->setRegSaveFrameIndex( + MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16, false)); + } // Store the integer parameter registers. SmallVector<SDValue, 8> MemOps; @@ -1773,13 +1845,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, DAG.getIntPtrConstant(Offset)); unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], - X86::GR64RegisterClass); + X86::GR64RegisterClass, dl); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack( - FuncInfo->getRegSaveFrameIndex()), - Offset, false, false, 0); + MachinePointerInfo::getFixedStack( + FuncInfo->getRegSaveFrameIndex(), Offset), + false, false, 0); MemOps.push_back(Store); Offset += 8; } @@ -1789,7 +1861,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<SDValue, 11> SaveXMMOps; SaveXMMOps.push_back(Chain); - unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass); + unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass, dl); SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); SaveXMMOps.push_back(ALVal); @@ -1799,8 +1871,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->getVarArgsFPOffset())); for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { - unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs], - X86::VR128RegisterClass); + unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs], + X86::VR128RegisterClass, dl); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); SaveXMMOps.push_back(Val); } @@ -1843,15 +1915,14 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { - const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0); - unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset(); + unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); - if (Flags.isByVal()) { + if (Flags.isByVal()) return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); - } + return DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset, + MachinePointerInfo::getStack(LocMemOffset), false, false, 0); } @@ -1867,7 +1938,8 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, OutRetAddr = getReturnAddressFrameIndex(DAG); // Load the "old" Return address. - OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0); + OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(), + false, false, 0); return SDValue(OutRetAddr.getNode(), 1); } @@ -1886,7 +1958,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, - PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0, + MachinePointerInfo::getFixedStack(NewReturnAddrFI), false, false, 0); return Chain; } @@ -1902,6 +1974,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); + bool IsWin64 = Subtarget->isTargetWin64(); bool IsStructRet = CallIsStructReturn(Outs); bool IsSibcall = false; @@ -1927,7 +2000,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); + + // Allocate shadow area for Win64 + if (IsWin64) { + CCInfo.AllocateStack(32, 8); + } + + CCInfo.AnalyzeCallOperands(Outs, CC_X86); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -1986,21 +2065,21 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CCValAssign::AExt: if (RegVT.isVector() && RegVT.getSizeInBits() == 128) { // Special case: passing MMX values in XMM registers. - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); } else Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg); break; case CCValAssign::Indirect: { // Store the argument. SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); Chain = DAG.getStore(Chain, dl, Arg, SpillSlot, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0); Arg = SpillSlot; break; @@ -2009,7 +2088,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - if (isVarArg && Subtarget->isTargetWin64()) { + if (isVarArg && IsWin64) { // Win64 ABI requires argument XMM reg to be copied to the corresponding // shadow reg if callee is a varargs function. unsigned ShadowReg = 0; @@ -2075,7 +2154,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } } - if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) { + if (Is64Bit && isVarArg && !IsWin64) { // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in @@ -2090,7 +2169,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); - assert((Subtarget->hasSSE1() || !NumXMMRegs) + assert((Subtarget->hasXMM() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"); Chain = DAG.getCopyToReg(Chain, dl, X86::AL, @@ -2143,7 +2222,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Store relative to framepointer. MemOpChains2.push_back( DAG.getStore(ArgChain, dl, Arg, FIN, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } @@ -2192,8 +2271,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && - (GV->isDeclaration() || GV->isWeakForLinker()) && - Subtarget->getDarwinVers() < 9) { + (GV->isDeclaration() || GV->isWeakForLinker()) && + Subtarget->getDarwinVers() < 9) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -2206,13 +2285,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { unsigned char OpFlags = 0; - // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external - // symbols should go through the PLT. + // On ELF targets, in either X86-64 or X86-32 mode, direct calls to + // external symbols should go through the PLT. if (Subtarget->isTargetELF() && getTargetMachine().getRelocationModel() == Reloc::PIC_) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && - Subtarget->getDarwinVers() < 9) { + Subtarget->getDarwinVers() < 9) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -2224,7 +2303,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; if (!IsSibcall && isTailCall) { @@ -2250,7 +2329,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); // Add an implicit use of AL for non-Windows x86 64-bit vararg functions. - if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) + if (Is64Bit && isVarArg && !IsWin64) Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); if (InFlag.getNode()) @@ -2337,7 +2416,7 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const TargetMachine &TM = MF.getTarget(); - const TargetFrameInfo &TFI = *TM.getFrameInfo(); + const TargetFrameLowering &TFI = *TM.getFrameLowering(); unsigned StackAlignment = TFI.getStackAlignment(); uint64_t AlignMask = StackAlignment - 1; int64_t Offset = StackSize; @@ -2364,7 +2443,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); - if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) + if (!TargetRegisterInfo::isVirtualRegister(VR)) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) @@ -2510,14 +2589,17 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + + // Allocate shadow area for Win64 + if (Subtarget->isTargetWin64()) { + CCInfo.AllocateStack(32, 8); + } + + CCInfo.AnalyzeCallOperands(Outs, CC_X86); if (CCInfo.getNextStackOffset()) { MachineFunction &MF = DAG.getMachineFunction(); if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) return false; - if (Subtarget->isTargetWin64()) - // Win64 ABI has additional complications. - return false; // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. @@ -2564,6 +2646,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } + // An stdcall caller is expected to clean up its arguments; the callee + // isn't going to do that. + if (!CCMatch && CallerCC==CallingConv::X86_StdCall) + return false; + return true; } @@ -2592,6 +2679,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::SHUFPD: + case X86ISD::PALIGN: case X86ISD::SHUFPS: case X86ISD::MOVLHPS: case X86ISD::MOVLHPD: @@ -2600,6 +2688,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVLPD: case X86ISD::MOVSHDUP: case X86ISD::MOVSLDUP: + case X86ISD::MOVDDUP: case X86ISD::MOVSS: case X86ISD::MOVSD: case X86ISD::UNPCKLPS: @@ -2625,6 +2714,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::MOVSHDUP: case X86ISD::MOVSLDUP: + case X86ISD::MOVDDUP: return DAG.getNode(Opc, dl, VT, V1); } @@ -2648,6 +2738,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); + case X86ISD::PALIGN: case X86ISD::SHUFPD: case X86ISD::SHUFPS: return DAG.getNode(Opc, dl, VT, V1, V2, @@ -2770,8 +2861,8 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, // First determine if it is required or is profitable to flip the operands. // If LHS is a foldable load, but RHS is not, flip the condition. - if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) && - !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) { + if (ISD::isNON_EXTLoad(LHS.getNode()) && + !ISD::isNON_EXTLoad(RHS.getNode())) { SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); std::swap(LHS, RHS); } @@ -2865,7 +2956,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) { /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference /// the second operand. static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) { - if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) + if (VT == MVT::v4f32 || VT == MVT::v4i32 ) return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); if (VT == MVT::v2f64 || VT == MVT::v2i64) return (Mask[0] < 2 && Mask[1] < 2); @@ -2933,15 +3024,15 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, bool hasSSSE3) { int i, e = VT.getVectorNumElements(); - + // Do not handle v2i64 / v2f64 shuffles with palignr. if (e < 4 || !hasSSSE3) return false; - + for (i = 0; i != e; ++i) if (Mask[i] >= 0) break; - + // All undef, not a palignr. if (i == e) return false; @@ -2952,13 +3043,13 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, bool NeedsUnary = false; int s = Mask[i] - i; - + // Check the rest of the elements to see if they are consecutive. for (++i; i != e; ++i) { int m = Mask[i]; - if (m < 0) + if (m < 0) continue; - + Unary = Unary && (m < (int)e); NeedsUnary = NeedsUnary || (m < s); @@ -3046,10 +3137,10 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { /// <2, 3, 2, 3> bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { unsigned NumElems = N->getValueType(0).getVectorNumElements(); - + if (NumElems != 4) return false; - + return isUndefOrEqual(N->getMaskElt(0), 2) && isUndefOrEqual(N->getMaskElt(1), 3) && isUndefOrEqual(N->getMaskElt(2), 2) && @@ -3320,6 +3411,44 @@ bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { return true; } +/// isVEXTRACTF128Index - Return true if the specified +/// EXTRACT_SUBVECTOR operand specifies a vector extract that is +/// suitable for input to VEXTRACTF128. +bool X86::isVEXTRACTF128Index(SDNode *N) { + if (!isa<ConstantSDNode>(N->getOperand(1).getNode())) + return false; + + // The index should be aligned on a 128-bit boundary. + uint64_t Index = + cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); + + unsigned VL = N->getValueType(0).getVectorNumElements(); + unsigned VBits = N->getValueType(0).getSizeInBits(); + unsigned ElSize = VBits / VL; + bool Result = (Index * ElSize) % 128 == 0; + + return Result; +} + +/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR +/// operand specifies a subvector insert that is suitable for input to +/// VINSERTF128. +bool X86::isVINSERTF128Index(SDNode *N) { + if (!isa<ConstantSDNode>(N->getOperand(2).getNode())) + return false; + + // The index should be aligned on a 128-bit boundary. + uint64_t Index = + cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); + + unsigned VL = N->getValueType(0).getVectorNumElements(); + unsigned VBits = N->getValueType(0).getSizeInBits(); + unsigned ElSize = VBits / VL; + bool Result = (Index * ElSize) % 128 == 0; + + return Result; +} + /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions. unsigned X86::getShuffleSHUFImmediate(SDNode *N) { @@ -3388,6 +3517,42 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { return (Val - i) * EltSize; } +/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate +/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128 +/// instructions. +unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) { + if (!isa<ConstantSDNode>(N->getOperand(1).getNode())) + llvm_unreachable("Illegal extract subvector for VEXTRACTF128"); + + uint64_t Index = + cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); + + EVT VecVT = N->getOperand(0).getValueType(); + EVT ElVT = VecVT.getVectorElementType(); + + unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); + + return Index / NumElemsPerChunk; +} + +/// getInsertVINSERTF128Immediate - Return the appropriate immediate +/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128 +/// instructions. +unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { + if (!isa<ConstantSDNode>(N->getOperand(2).getNode())) + llvm_unreachable("Illegal insert subvector for VINSERTF128"); + + uint64_t Index = + cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); + + EVT VecVT = N->getValueType(0); + EVT ElVT = VecVT.getVectorElementType(); + + unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); + + return Index / NumElemsPerChunk; +} + /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { @@ -3537,13 +3702,10 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted + // Always build SSE zero vectors as <4 x i32> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; - if (VT.getSizeInBits() == 64) { // MMX - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - } else if (VT.getSizeInBits() == 128) { + if (VT.getSizeInBits() == 128) { // SSE if (HasSSE2) { // SSE2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); @@ -3559,7 +3721,7 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8); } - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); + return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } /// getOnesVector - Returns a vector of specified type with all bits set. @@ -3571,11 +3733,8 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { // type. This ensures they get CSE'd. SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); SDValue Vec; - if (VT.getSizeInBits() == 64) // MMX - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - else // SSE - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } @@ -3640,9 +3799,6 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, /// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32. static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { - if (SV->getValueType(0).getVectorNumElements() <= 4) - return SDValue(SV, 0); - EVT PVT = MVT::v4f32; EVT VT = SV->getValueType(0); DebugLoc dl = SV->getDebugLoc(); @@ -3663,9 +3819,9 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { // Perform the splat. int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); + V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1); V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); + return DAG.getNode(ISD::BITCAST, dl, VT, V1); } /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified @@ -3789,7 +3945,7 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, } // Actual nodes that may contain scalar elements - if (Opcode == ISD::BIT_CONVERT) { + if (Opcode == ISD::BITCAST) { V = V.getOperand(0); EVT SrcVT = V.getValueType(); unsigned NumElems = VT.getVectorNumElements(); @@ -3978,7 +4134,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, } } - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V); } /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. @@ -4017,11 +4173,10 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc dl) { - bool isMMX = VT.getSizeInBits() == 64; - EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; + EVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; - SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opc, dl, ShVT, SrcOp, DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); } @@ -4029,7 +4184,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, SDValue X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, SelectionDAG &DAG) const { - + // Check if the scalar load can be widened into a vector load. And if // the address is "base + cst" see if the cst can be "absorbed" into // the shuffle mask. @@ -4046,8 +4201,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) { FI = FINode->getIndex(); Offset = 0; - } else if (Ptr.getOpcode() == ISD::ADD && - isa<ConstantSDNode>(Ptr.getOperand(1)) && + } else if (DAG.isBaseWithConstantOffset(Ptr) && isa<FrameIndexSDNode>(Ptr.getOperand(0))) { FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); Offset = Ptr.getConstantOperandVal(1); @@ -4084,41 +4238,42 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, int EltNo = (Offset - StartOffset) >> 2; int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; - SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0, + SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(StartOffset), false, false, 0); // Canonicalize it to a v4i32 shuffle. - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getVectorShuffle(MVT::v4i32, dl, V1, - DAG.getUNDEF(MVT::v4i32), &Mask[0])); + DAG.getUNDEF(MVT::v4i32),&Mask[0])); } return SDValue(); } -/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a -/// vector of type 'VT', see if the elements can be replaced by a single large +/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a +/// vector of type 'VT', see if the elements can be replaced by a single large /// load which has the same value as a build_vector whose operands are 'elts'. /// /// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a -/// +/// /// FIXME: we'd also like to handle the case where the last elements are zero /// rather than undef via VZEXT_LOAD, but we do not detect that case today. /// There's even a handy isZeroNode for that purpose. static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, - DebugLoc &dl, SelectionDAG &DAG) { + DebugLoc &DL, SelectionDAG &DAG) { EVT EltVT = VT.getVectorElementType(); unsigned NumElems = Elts.size(); - + LoadSDNode *LDBase = NULL; unsigned LastLoadedElt = -1U; - + // For each element in the initializer, see if we've found a load or an undef. - // If we don't find an initial load element, or later load elements are + // If we don't find an initial load element, or later load elements are // non-consecutive, bail out. for (unsigned i = 0; i < NumElems; ++i) { SDValue Elt = Elts[i]; - + if (!Elt.getNode() || (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) return SDValue(); @@ -4143,18 +4298,20 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, // consecutive loads for the low half, generate a vzext_load node. if (LastLoadedElt == NumElems - 1) { if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16) - return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(), - LDBase->getSrcValue(), LDBase->getSrcValueOffset(), + return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), + LDBase->getPointerInfo(), LDBase->isVolatile(), LDBase->isNonTemporal(), 0); - return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(), - LDBase->getSrcValue(), LDBase->getSrcValueOffset(), + return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), + LDBase->getPointerInfo(), LDBase->isVolatile(), LDBase->isNonTemporal(), LDBase->getAlignment()); } else if (NumElems == 4 && LastLoadedElt == 1) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; - SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); + SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, + Ops, 2, MVT::i32, + LDBase->getMemOperand()); + return DAG.getNode(ISD::BITCAST, DL, VT, ResNode); } return SDValue(); } @@ -4162,6 +4319,35 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); + + EVT VT = Op.getValueType(); + EVT ExtVT = VT.getVectorElementType(); + + unsigned NumElems = Op.getNumOperands(); + + // For AVX-length vectors, build the individual 128-bit pieces and + // use shuffles to put them in place. + if (VT.getSizeInBits() > 256 && + Subtarget->hasAVX() && + !Disable256Bit && + !ISD::isBuildVectorAllZeros(Op.getNode())) { + SmallVector<SDValue, 8> V; + V.resize(NumElems); + for (unsigned i = 0; i < NumElems; ++i) { + V[i] = Op.getOperand(i); + } + + EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); + + // Build the lower subvector. + SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2); + // Build the upper subvector. + SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2], + NumElems/2); + + return ConcatVectors(Lower, Upper, DAG); + } + // All zero's are handled with pxor in SSE2 and above, xorps in SSE1. // All one's are handled with pcmpeqd. In AVX, zero's are handled with // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd @@ -4169,10 +4355,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ISD::isBuildVectorAllZeros(Op.getNode()) || (Op.getValueType().getSizeInBits() != 256 && ISD::isBuildVectorAllOnes(Op.getNode()))) { - // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to + // Canonicalize this to <4 x i32> (SSE) to // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are // eliminated on x86-32 hosts. - if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) + if (Op.getValueType() == MVT::v4i32) return Op; if (ISD::isBuildVectorAllOnes(Op.getNode())) @@ -4180,11 +4366,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl); } - EVT VT = Op.getValueType(); - EVT ExtVT = VT.getVectorElementType(); unsigned EVTBits = ExtVT.getSizeInBits(); - unsigned NumElems = Op.getNumOperands(); unsigned NumZero = 0; unsigned NumNonZero = 0; unsigned NonZeros = 0; @@ -4223,9 +4406,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i64 && !Subtarget->is64Bit() && (!IsAllConstants || Idx == 0)) { if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { - // Handle MMX and SSE both. - EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; - unsigned VecElts = VT == MVT::v2i64 ? 4 : 2; + // Handle SSE only. + assert(VT == MVT::v2i64 && "Expected an SSE value type!"); + EVT VecVT = MVT::v4i32; + unsigned VecElts = 4; // Truncate the value (which may itself be a constant) to i32, and // convert it to a vector with movd (S2V+shuffle to zero extend). @@ -4245,7 +4429,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DAG.getUNDEF(Item.getValueType()), &Mask[0]); } - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item); } } @@ -4264,11 +4448,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DAG); } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; + assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); + EVT MiddleVT = MVT::v4i32; Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), DAG); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item); + return DAG.getNode(ISD::BITCAST, dl, VT, Item); } } @@ -4394,20 +4579,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Check for a build vector of consecutive loads. for (unsigned i = 0; i < NumElems; ++i) V[i] = Op.getOperand(i); - + // Check for elements which are consecutive loads. SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG); if (LD.getNode()) return LD; - - // For SSE 4.1, use insertps to put the high elements into the low element. + + // For SSE 4.1, use insertps to put the high elements into the low element. if (getSubtarget()->hasSSE41()) { SDValue Result; if (Op.getOperand(0).getOpcode() != ISD::UNDEF) Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); else Result = DAG.getUNDEF(VT); - + for (unsigned i = 1; i < NumElems; ++i) { if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue; Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result, @@ -4415,7 +4600,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } return Result; } - + // Otherwise, expand into a number of unpckl*, start by extending each of // our (non-undef) elements to the full vector width with the element in the // bottom slot of the vector (which generates no code for SSE). @@ -4441,7 +4626,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (V[i+EltStride].getOpcode() == ISD::UNDEF && EltStride == NumElems/2) continue; - + V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]); } EltStride >>= 1; @@ -4461,21 +4646,21 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 || ResVT == MVT::v8i16 || ResVT == MVT::v16i8); int Mask[2]; - SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0)); + SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0)); SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); InVec = Op.getOperand(1); if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { unsigned NumElts = ResVT.getVectorNumElements(); - VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp); + VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp, InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1)); } else { - InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec); + InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec); SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); Mask[0] = 0; Mask[1] = 2; VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask); } - return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp); + return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); } // v8i16 shuffles - Prefer shuffles in the following order: @@ -4557,9 +4742,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); NewV = DAG.getVectorShuffle(MVT::v2i64, dl, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); - NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1), + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]); + NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV); // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the // source words for the shuffle, to aid later transformations. @@ -4628,12 +4813,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8)); } - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1); + V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1); V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); if (!TwoInputs) - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); + return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); // Calculate the shuffle mask for the second input, shuffle it, and // OR it with the first shuffled input. @@ -4648,12 +4833,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8)); pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8)); } - V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2); + V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2); V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); + return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); } // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order, @@ -4820,8 +5005,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, // No SSSE3 - Calculate in place words and then fix all out of place words // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from // the 16 different words that comprise the two doublequadword input vectors. - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); - V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V2); + V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2); SDValue NewV = V2Only ? V2 : V1; for (int i = 0; i != 8; ++i) { int Elt0 = MaskVals[i*2]; @@ -4883,25 +5068,23 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt, DAG.getIntPtrConstant(i)); } - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, NewV); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV); } /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide -/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be +/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be /// done when every pair / quad of shuffle mask elements point to elements in /// the right sequence. e.g. -/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> +/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15> static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, - const TargetLowering &TLI, DebugLoc dl) { + SelectionDAG &DAG, DebugLoc dl) { EVT VT = SVOp->getValueType(0); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; - EVT MaskVT = (NewWidth == 4) ? MVT::v4i16 : MVT::v2i32; - EVT NewVT = MaskVT; + EVT NewVT; switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unexpected!"); case MVT::v4f32: NewVT = MVT::v2f64; break; @@ -4910,12 +5093,6 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, case MVT::v16i8: NewVT = MVT::v4i32; break; } - if (NewWidth == 2) { - if (VT.isInteger()) - NewVT = MVT::v2i64; - else - NewVT = MVT::v2f64; - } int Scale = NumElems / NewWidth; SmallVector<int, 8> MaskVec; for (unsigned i = 0; i < NumElems; i += Scale) { @@ -4935,8 +5112,8 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, MaskVec.push_back(StartIdx / Scale); } - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1); - V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2); + V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2); return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]); } @@ -4953,13 +5130,13 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, // movssrr and movsdrr do not clear top bits. Try to use movd, movq // instead. MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; - if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) && + if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) && SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR && - SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT && + SrcOp.getOperand(0).getOpcode() == ISD::BITCAST && SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) { // PR2108 OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32; - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, OpVT, @@ -4969,9 +5146,9 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, } } - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT, - DAG.getNode(ISD::BIT_CONVERT, dl, + DAG.getNode(ISD::BITCAST, dl, OpVT, SrcOp))); } @@ -5125,7 +5302,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } static bool MayFoldVectorLoad(SDValue V) { - if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT) + if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) V = V.getOperand(0); @@ -5134,6 +5311,110 @@ static bool MayFoldVectorLoad(SDValue V) { return false; } +// FIXME: the version above should always be used. Since there's +// a bug where several vector shuffles can't be folded because the +// DAG is not updated during lowering and a node claims to have two +// uses while it only has one, use this version, and let isel match +// another instruction if the load really happens to have more than +// one use. Remove this version after this bug get fixed. +// rdar://8434668, PR8156 +static bool RelaxedMayFoldVectorLoad(SDValue V) { + if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) + V = V.getOperand(0); + if (ISD::isNormalLoad(V.getNode())) + return true; + return false; +} + +/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by +/// a vector extract, and if both can be later optimized into a single load. +/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked +/// here because otherwise a target specific shuffle node is going to be +/// emitted for this shuffle, and the optimization not done. +/// FIXME: This is probably not the best approach, but fix the problem +/// until the right path is decided. +static +bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT = V.getValueType(); + ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V); + + // Be sure that the vector shuffle is present in a pattern like this: + // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr) + if (!V.hasOneUse()) + return false; + + SDNode *N = *V.getNode()->use_begin(); + if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return false; + + SDValue EltNo = N->getOperand(1); + if (!isa<ConstantSDNode>(EltNo)) + return false; + + // If the bit convert changed the number of elements, it is unsafe + // to examine the mask. + bool HasShuffleIntoBitcast = false; + if (V.getOpcode() == ISD::BITCAST) { + EVT SrcVT = V.getOperand(0).getValueType(); + if (SrcVT.getVectorNumElements() != VT.getVectorNumElements()) + return false; + V = V.getOperand(0); + HasShuffleIntoBitcast = true; + } + + // Select the input vector, guarding against out of range extract vector. + unsigned NumElems = VT.getVectorNumElements(); + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt); + V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1); + + // Skip one more bit_convert if necessary + if (V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + + if (ISD::isNormalLoad(V.getNode())) { + // Is the original load suitable? + LoadSDNode *LN0 = cast<LoadSDNode>(V); + + // FIXME: avoid the multi-use bug that is preventing lots of + // of foldings to be detected, this is still wrong of course, but + // give the temporary desired behavior, and if it happens that + // the load has real more uses, during isel it will not fold, and + // will generate poor code. + if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse() + return false; + + if (!HasShuffleIntoBitcast) + return true; + + // If there's a bitcast before the shuffle, check if the load type and + // alignment is valid. + unsigned Align = LN0->getAlignment(); + unsigned NewAlign = + TLI.getTargetData()->getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) + return false; + } + + return true; +} + +static +SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + + // Canonizalize to v2f64. + V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); + return DAG.getNode(ISD::BITCAST, dl, VT, + getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64, + V1, DAG)); +} + static SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { @@ -5191,6 +5472,10 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op)) CanFoldLoad = true; + // Both of them can't be memory operations though. + if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) + CanFoldLoad = false; + if (CanFoldLoad) { if (HasSSE2 && NumElems == 2) return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); @@ -5228,7 +5513,7 @@ static inline unsigned getUNPCKLOpcode(EVT VT) { case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; default: - llvm_unreachable("Unknow type for unpckl"); + llvm_unreachable("Unknown type for unpckl"); } return 0; } @@ -5242,63 +5527,111 @@ static inline unsigned getUNPCKHOpcode(EVT VT) { case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v8i16: return X86ISD::PUNPCKHWD; default: - llvm_unreachable("Unknow type for unpckh"); + llvm_unreachable("Unknown type for unpckh"); } return 0; } -SDValue -X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { +static +SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI, + const X86Subtarget *Subtarget) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - unsigned NumElems = VT.getVectorNumElements(); - bool isMMX = VT.getSizeInBits() == 64; - bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; - bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; - bool V1IsSplat = false; - bool V2IsSplat = false; - bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX(); - bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX(); - MachineFunction &MF = DAG.getMachineFunction(); - bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); if (isZeroShuffle(SVOp)) return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); - // Promote splats to v4f32. + // Handle splat operations if (SVOp->isSplat()) { - if (isMMX || NumElems < 4) + // Special case, this is the only place now where it's + // allowed to return a vector_shuffle operation without + // using a target specific node, because *hopefully* it + // will be optimized away by the dag combiner. + if (VT.getVectorNumElements() <= 4 && + CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI)) return Op; + + // Handle splats by matching through known masks + if (VT.getVectorNumElements() <= 4) + return SDValue(); + + // Canonicalize all of the remaining to v4f32. return PromoteSplat(SVOp, DAG); } // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, - LowerVECTOR_SHUFFLE(NewOp, DAG)); + return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) { if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false)) return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), DAG, Subtarget, dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp))) return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), DAG, Subtarget, dl); } } + return SDValue(); +} + +SDValue +X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned NumElems = VT.getVectorNumElements(); + bool isMMX = VT.getSizeInBits() == 64; + bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; + bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; + bool V1IsSplat = false; + bool V2IsSplat = false; + bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX(); + bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX(); + bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX(); + MachineFunction &MF = DAG.getMachineFunction(); + bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + + // Shuffle operations on MMX not supported. + if (isMMX) + return Op; + + // Vector shuffle lowering takes 3 steps: + // + // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable + // narrowing and commutation of operands should be handled. + // 2) Matching of shuffles with known shuffle masks to x86 target specific + // shuffle nodes. + // 3) Rewriting of unmatched masks into new generic shuffle operations, + // so the shuffle can be broken into other shuffles and the legalizer can + // try the lowering again. + // + // The general ideia is that no vector_shuffle operation should be left to + // be matched during isel, all of them must be converted to a target specific + // node here. + + // Normalize the input vectors. Here splats, zeroed vectors, profitable + // narrowing and commutation of operands should be handled. The actual code + // doesn't include all of those, work in progress... + SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget); + if (NewOp.getNode()) + return NewOp; // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size. @@ -5309,6 +5642,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (VT != MVT::v2i64 && VT != MVT::v2f64) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef && + RelaxedMayFoldVectorLoad(V1)) + return getMOVDDup(Op, dl, V1, DAG); + + if (X86::isMOVHLPS_v_undef_Mask(SVOp)) + return getMOVHighToLow(Op, dl, DAG); + + // Use to match splats + if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef && + (VT == MVT::v2f64 || VT == MVT::v2i64)) + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + if (X86::isPSHUFDMask(SVOp)) { // The actual implementation will match the mask in the if above and then // during isel it can match several different instructions, not only pshufd @@ -5349,7 +5694,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); - if (!isMMX && !X86::isMOVLPMask(SVOp)) { + if (!X86::isMOVLPMask(SVOp)) { if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); @@ -5359,22 +5704,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } // FIXME: fold these into legal mask. - if (!isMMX) { - if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) - return getMOVLowToHigh(Op, dl, DAG, HasSSE2); + if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) + return getMOVLowToHigh(Op, dl, DAG, HasSSE2); - if (X86::isMOVHLPSMask(SVOp)) - return getMOVHighToLow(Op, dl, DAG); + if (X86::isMOVHLPSMask(SVOp)) + return getMOVHighToLow(Op, dl, DAG); - if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) - return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); + if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); - if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) - return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); + if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); - if (X86::isMOVLPMask(SVOp)) - return getMOVLP(Op, dl, DAG, HasSSE2); - } + if (X86::isMOVLPMask(SVOp)) + return getMOVLP(Op, dl, DAG, HasSSE2); if (ShouldXformToMOVHLPS(SVOp) || ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) @@ -5414,13 +5757,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKL_v_undef_Mask(SVOp) || X86::isUNPCKLMask(SVOp)) - return (isMMX) ? - Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + if (X86::isUNPCKLMask(SVOp)) + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); - if (X86::isUNPCKH_v_undef_Mask(SVOp) || X86::isUNPCKHMask(SVOp)) - return (isMMX) ? - Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); + if (X86::isUNPCKHMask(SVOp)) + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -5443,19 +5784,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); - if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || X86::isUNPCKLMask(NewSVOp)) - return (isMMX) ? - NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); + if (X86::isUNPCKLMask(NewSVOp)) + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); - if (X86::isUNPCKH_v_undef_Mask(NewSVOp) || X86::isUNPCKHMask(NewSVOp)) - return (isMMX) ? - NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); + if (X86::isUNPCKHMask(NewSVOp)) + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); } - // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. - // Normalize the node to match x86 shuffle ops if needed - if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) + if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) return CommuteVectorShuffle(SVOp, DAG); // The checks below are all present in isShuffleMaskLegal, but they are @@ -5464,15 +5801,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SmallVector<int, 16> M; SVOp->getMask(M); - // Very little shuffling can be done for 64-bit vectors right now. - if (VT.getSizeInBits() == 64) - return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ? Op : SDValue(); + if (isPALIGNRMask(M, VT, HasSSSE3)) + return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, + X86::getShufflePALIGNRImmediate(SVOp), + DAG); - // FIXME: pshufb, blends, shifts. - if (VT.getVectorNumElements() == 2 || - ShuffleVectorSDNode::isSplatMask(&M[0], VT) || - isPALIGNRMask(M, VT, Subtarget->hasSSSE3())) - return Op; + if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && + SVOp->getSplatIndex() == 0 && V2IsUndef) { + if (VT == MVT::v2f64) + return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG); + if (VT == MVT::v2i64) + return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG); + } if (isPSHUFHWMask(M, VT)) return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1, @@ -5494,6 +5834,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { TargetMask, DAG); } + if (X86::isUNPCKL_v_undef_Mask(SVOp)) + if (VT != MVT::v2i64 && VT != MVT::v2f64) + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + if (X86::isUNPCKH_v_undef_Mask(SVOp)) + if (VT != MVT::v2i64 && VT != MVT::v2f64) + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG); @@ -5507,8 +5854,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return NewOp; } - // Handle all 4 wide cases with a number of shuffles except for MMX. - if (NumElems == 4 && !isMMX) + // Handle all 4 wide cases with a number of shuffles. + if (NumElems == 4) return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); return SDValue(); @@ -5531,7 +5878,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, if (Idx == 0) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BIT_CONVERT, dl, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), Op.getOperand(1))); @@ -5552,14 +5899,14 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, if ((User->getOpcode() != ISD::STORE || (isa<ConstantSDNode>(Op.getOperand(1)) && cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) && - (User->getOpcode() != ISD::BIT_CONVERT || + (User->getOpcode() != ISD::BITCAST || User->getValueType(0) != MVT::i32)) return SDValue(); SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), Op.getOperand(1)); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Extract); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract); } else if (VT == MVT::i32) { // ExtractPS works with constant index. if (isa<ConstantSDNode>(Op.getOperand(1))) @@ -5575,6 +5922,38 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, if (!isa<ConstantSDNode>(Op.getOperand(1))) return SDValue(); + SDValue Vec = Op.getOperand(0); + EVT VecVT = Vec.getValueType(); + + // If this is a 256-bit vector result, first extract the 128-bit + // vector and then extract from the 128-bit vector. + if (VecVT.getSizeInBits() > 128) { + DebugLoc dl = Op.getNode()->getDebugLoc(); + unsigned NumElems = VecVT.getVectorNumElements(); + SDValue Idx = Op.getOperand(1); + + if (!isa<ConstantSDNode>(Idx)) + return SDValue(); + + unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128); + unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + // Get the 128-bit vector. + bool Upper = IdxVal >= ExtractNumElems; + Vec = Extract128BitVector(Vec, Idx, DAG, dl); + + // Extract from it. + SDValue ScaledIdx = Idx; + if (Upper) + ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx, + DAG.getConstant(ExtractNumElems, + Idx.getValueType())); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, + ScaledIdx); + } + + assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length"); + if (Subtarget->hasSSE41()) { SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); if (Res.getNode()) @@ -5590,7 +5969,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, if (Idx == 0) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BIT_CONVERT, dl, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Vec), Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. @@ -5650,8 +6029,6 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, unsigned Opc; if (VT == MVT::v8i16) Opc = X86ISD::PINSRW; - else if (VT == MVT::v4i16) - Opc = X86ISD::MMX_PINSRW; else if (VT == MVT::v16i8) Opc = X86ISD::PINSRB; else @@ -5689,17 +6066,45 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + + // If this is a 256-bit vector result, first insert into a 128-bit + // vector and then insert into the 256-bit vector. + if (VT.getSizeInBits() > 128) { + if (!isa<ConstantSDNode>(N2)) + return SDValue(); + + // Get the 128-bit vector. + unsigned NumElems = VT.getVectorNumElements(); + unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue(); + bool Upper = IdxVal >= NumElems / 2; + + SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl); + + // Insert into it. + SDValue ScaledN2 = N2; + if (Upper) + ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2, + DAG.getConstant(NumElems / + (VT.getSizeInBits() / 128), + N2.getValueType())); + Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0, + N1, ScaledN2); + + // Insert the 128-bit vector + // FIXME: Why UNDEF? + return Insert128BitVector(N0, Op, N2, DAG, dl); + } + if (Subtarget->hasSSE41()) return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); if (EltVT == MVT::i8) return SDValue(); - DebugLoc dl = Op.getDebugLoc(); - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDValue N2 = Op.getOperand(2); - if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) { // Transform it so it match pinsrw which expects a 16-bit value in a GR32 // as its second argument. @@ -5707,31 +6112,79 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); - return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW, - dl, VT, N0, N1, N2); + return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2); } return SDValue(); } SDValue X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { + LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - + EVT OpVT = Op.getValueType(); + + // If this is a 256-bit vector result, first insert into a 128-bit + // vector and then insert into the 256-bit vector. + if (OpVT.getSizeInBits() > 128) { + // Insert into a 128-bit vector. + EVT VT128 = EVT::getVectorVT(*Context, + OpVT.getVectorElementType(), + OpVT.getVectorNumElements() / 2); + + Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0)); + + // Insert the 128-bit vector. + return Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, OpVT), Op, + DAG.getConstant(0, MVT::i32), + DAG, dl); + } + if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); - EVT VT = MVT::v2i32; - switch (Op.getValueType().getSimpleVT().SimpleTy) { - default: break; - case MVT::v16i8: - case MVT::v8i16: - VT = MVT::v4i32; - break; + assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 && + "Expected an SSE type!"); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt)); +} + +// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in +// a simple subregister reference or explicit instructions to grab +// upper bits of a vector. +SDValue +X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget->hasAVX()) { + DebugLoc dl = Op.getNode()->getDebugLoc(); + SDValue Vec = Op.getNode()->getOperand(0); + SDValue Idx = Op.getNode()->getOperand(1); + + if (Op.getNode()->getValueType(0).getSizeInBits() == 128 + && Vec.getNode()->getValueType(0).getSizeInBits() == 256) { + return Extract128BitVector(Vec, Idx, DAG, dl); + } } - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt)); + return SDValue(); +} + +// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a +// simple superregister reference or explicit instructions to insert +// the upper bits of a vector. +SDValue +X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget->hasAVX()) { + DebugLoc dl = Op.getNode()->getDebugLoc(); + SDValue Vec = Op.getNode()->getOperand(0); + SDValue SubVec = Op.getNode()->getOperand(1); + SDValue Idx = Op.getNode()->getOperand(2); + + if (Op.getNode()->getValueType(0).getSizeInBits() == 256 + && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) { + return Insert128BitVector(Vec, SubVec, Idx, DAG, dl); + } + } + return SDValue(); } // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as @@ -5797,12 +6250,11 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); // With PIC, the address is actually $g + Offset. - if (OpFlag) { + if (OpFlag) Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy()), Result); - } return Result; } @@ -5906,7 +6358,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, // load. if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - PseudoSourceValue::getGOT(), 0, false, false, 0); + MachinePointerInfo::getGOT(), false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. @@ -5929,7 +6381,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags) { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); DebugLoc dl = GA->getDebugLoc(); SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), @@ -5978,14 +6430,14 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, TLSModel::Model model, bool is64Bit) { DebugLoc dl = GA->getDebugLoc(); - // Get the Thread Pointer - SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress, - DebugLoc(), PtrVT, - DAG.getRegister(is64Bit? X86::FS : X86::GS, - MVT::i32)); - SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base, - NULL, 0, false, false, 0); + // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit). + Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(), + is64Bit ? 257 : 256)); + + SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getIntPtrConstant(0), + MachinePointerInfo(Ptr), false, false, 0); unsigned char OperandFlags = 0; // Most TLS accesses are not RIP relative, even on x86-64. One exception is @@ -6004,14 +6456,14 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial // exec) - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); if (model == TLSModel::InitialExec) Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, - PseudoSourceValue::getGOT(), 0, false, false, 0); + MachinePointerInfo::getGOT(), false, false, 0); // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. @@ -6020,29 +6472,29 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, SDValue X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { - + GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GA->getGlobal(); if (Subtarget->isTargetELF()) { // TODO: implement the "local dynamic" model // TODO: implement the "initial exec"model for pic executables - + // If GV is an alias then use the aliasee for determining // thread-localness. if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) GV = GA->resolveAliasedGlobal(false); - - TLSModel::Model model + + TLSModel::Model model = getTLSModel(GV, getTargetMachine().getRelocationModel()); - + switch (model) { case TLSModel::GeneralDynamic: case TLSModel::LocalDynamic: // not implemented if (Subtarget->is64Bit()) return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); - + case TLSModel::InitialExec: case TLSModel::LocalExec: return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, @@ -6053,7 +6505,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { unsigned char OpFlag = 0; unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ? X86ISD::WrapperRIP : X86ISD::Wrapper; - + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) && @@ -6062,24 +6514,26 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { OpFlag = X86II::MO_TLVP_PIC_BASE; else OpFlag = X86II::MO_TLVP; - DebugLoc DL = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, - getPointerTy(), + GA->getValueType(0), GA->getOffset(), OpFlag); SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); - + // With PIC32, the address is actually $g + Offset. if (PIC32) Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy()), Offset); - + // Lowering the machine isd will make sure everything is in the right // location. - SDValue Args[] = { Offset }; - SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1); - + SDValue Chain = DAG.getEntryNode(); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Args[] = { Chain, Offset }; + Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2); + // TLSCALL will be codegen'ed as call. Inform MFI that function has calls. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setAdjustsStack(true); @@ -6089,7 +6543,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); } - + assert(false && "TLS not implemented for this target."); @@ -6148,12 +6602,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); - if (SrcVT.isVector()) { - if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) { - return Op; - } + if (SrcVT.isVector()) return SDValue(); - } assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); @@ -6174,25 +6624,36 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0, + MachinePointerInfo::getFixedStack(SSFI), false, false, 0); return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); } SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, - SDValue StackSlot, + SDValue StackSlot, SelectionDAG &DAG) const { // Build the FILD - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); SDVTList Tys; bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); if (useSSE) - Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); + Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue); else Tys = DAG.getVTList(Op.getValueType(), MVT::Other); + + unsigned ByteSize = SrcVT.getSizeInBits()/8; + + int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex(); + MachineMemOperand *MMO = + DAG.getMachineFunction() + .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), + MachineMemOperand::MOLoad, ByteSize, ByteSize); + SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) }; - SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, dl, - Tys, Ops, array_lengthof(Ops)); + SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : + X86ISD::FILD, DL, + Tys, Ops, array_lengthof(Ops), + SrcVT, MMO); if (useSSE) { Chain = Result.getValue(1); @@ -6202,15 +6663,23 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, // shouldn't be necessary except that RFP cannot be live across // multiple blocks. When stackifier is fixed, they can be uncoupled. MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); + unsigned SSFISize = Op.getValueType().getSizeInBits()/8; + int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); Tys = DAG.getVTList(MVT::Other); SDValue Ops[] = { Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag }; - Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops)); - Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0, + MachineMemOperand *MMO = + DAG.getMachineFunction() + .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), + MachineMemOperand::MOStore, SSFISize, SSFISize); + + Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, + Ops, array_lengthof(Ops), + Op.getValueType(), MMO); + Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot, + MachinePointerInfo::getFixedStack(SSFI), false, false, 0); } @@ -6284,12 +6753,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, DAG.getIntPtrConstant(0))); SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); - SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); + SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); @@ -6317,19 +6786,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, DAG.getIntPtrConstant(0))); Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Load), + DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load), DAG.getIntPtrConstant(0)); // Or the load with the bias. SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Load)), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias))); Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Or), + DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or), DAG.getIntPtrConstant(0)); // Subtract the bias. @@ -6374,24 +6843,34 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackSlot, WordOff); SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), - StackSlot, NULL, 0, false, false, 0); + StackSlot, MachinePointerInfo(), + false, false, 0); SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), - OffsetSlot, NULL, 0, false, false, 0); + OffsetSlot, MachinePointerInfo(), + false, false, 0); SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); return Fild; } assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), - StackSlot, NULL, 0, false, false, 0); + StackSlot, MachinePointerInfo(), + false, false, 0); // For i64 source, we need to add the appropriate power of 2 if the input // was negative. This is the same as the optimization in // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here, // we must be careful to do the computation in x87 extended precision, not // in SSE. (The generic code can't know it's OK to do this, or how to.) + int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex(); + MachineMemOperand *MMO = + DAG.getMachineFunction() + .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), + MachineMemOperand::MOLoad, 8, 8); + SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; - SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3); + SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3, + MVT::i64, MMO); APInt FF(32, 0x5F800000ULL); @@ -6414,9 +6893,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Load the value out, extending it from f32 to f80. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(), - FudgePtr, PseudoSourceValue::getConstantPool(), - 0, MVT::f32, false, false, 4); + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), + FudgePtr, MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, 4); // Extend everything to 80 bits to force it to be done on x87. SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0)); @@ -6424,7 +6903,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, std::pair<SDValue,SDValue> X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); EVT DstTy = Op.getValueType(); @@ -6453,6 +6932,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + + unsigned Opc; switch (DstTy.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); @@ -6463,37 +6944,43 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { SDValue Chain = DAG.getEntryNode(); SDValue Value = Op.getOperand(0); - if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { + EVT TheVT = Op.getOperand(0).getValueType(); + if (isScalarFPTypeInSSEReg(TheVT)) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); - Chain = DAG.getStore(Chain, dl, Value, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0, + Chain = DAG.getStore(Chain, DL, Value, StackSlot, + MachinePointerInfo::getFixedStack(SSFI), false, false, 0); SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDValue Ops[] = { - Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) + Chain, StackSlot, DAG.getValueType(TheVT) }; - Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), + MachineMemOperand::MOLoad, MemSize, MemSize); + Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3, + DstTy, MMO); Chain = Value.getValue(1); SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); } + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), + MachineMemOperand::MOStore, MemSize, MemSize); + // Build the FP_TO_INT*_IN_MEM SDValue Ops[] = { Chain, Value, StackSlot }; - SDValue FIST = DAG.getNode(Opc, dl, MVT::Other, Ops, 3); + SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), + Ops, 3, DstTy, MMO); return std::make_pair(FIST, StackSlot); } SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { - if (Op.getValueType().isVector()) { - if (Op.getValueType() == MVT::v2i32 && - Op.getOperand(0).getValueType() == MVT::v2f64) { - return Op; - } + if (Op.getValueType().isVector()) return SDValue(); - } std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true); SDValue FIST = Vals.first, StackSlot = Vals.second; @@ -6502,7 +6989,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0, false, false, 0); + FIST, StackSlot, MachinePointerInfo(), false, false, 0); } SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, @@ -6513,7 +7000,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0, false, false, 0); + FIST, StackSlot, MachinePointerInfo(), false, false, 0); } SDValue X86TargetLowering::LowerFABS(SDValue Op, @@ -6539,7 +7026,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } @@ -6566,14 +7053,14 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); if (VT.isVector()) { - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(ISD::XOR, dl, MVT::v2i64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op.getOperand(0)), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, Mask))); + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask))); } else { return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask); } @@ -6615,7 +7102,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1); @@ -6625,7 +7112,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit); SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit, DAG.getConstant(32, MVT::i32)); - SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, SignBit); + SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit); SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit, DAG.getIntPtrConstant(0)); } @@ -6644,7 +7131,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2); @@ -6884,8 +7371,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Lower (X & (1 << N)) == 0 to BT(X, N). // Lower ((X >>u N) & 1) != 0 to BT(X, N). // Lower ((X >>s N) & 1) != 0 to BT(X, N). - if (Op0.getOpcode() == ISD::AND && - Op0.hasOneUse() && + if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant && cast<ConstantSDNode>(Op1)->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { @@ -6894,19 +7380,25 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return NewSetCC; } - // Look for "(setcc) == / != 1" to avoid unncessary setcc. - if (Op0.getOpcode() == X86ISD::SETCC && - Op1.getOpcode() == ISD::Constant && + // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of + // these. + if (Op1.getOpcode() == ISD::Constant && (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || cast<ConstantSDNode>(Op1)->isNullValue()) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); - bool Invert = (CC == ISD::SETNE) ^ - cast<ConstantSDNode>(Op1)->isNullValue(); - if (Invert) + + // If the input is a setcc, then reuse the input setcc or use a new one with + // the inverted condition. + if (Op0.getOpcode() == X86ISD::SETCC) { + X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); + bool Invert = (CC == ISD::SETNE) ^ + cast<ConstantSDNode>(Op1)->isNullValue(); + if (!Invert) return Op0; + CCode = X86::GetOppositeBranchCondition(CCode); - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); + } } bool isFP = Op1.getValueType().isFloatingPoint(); @@ -6914,17 +7406,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (X86CC == X86::COND_INVALID) return SDValue(); - SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); - - // Use sbb x, x to materialize carry bit into a GPR. - if (X86CC == X86::COND_B) - return DAG.getNode(ISD::AND, dl, MVT::i8, - DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), Cond), - DAG.getConstant(1, MVT::i8)); - + SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), Cond); + DAG.getConstant(X86CC, MVT::i8), EFLAGS); } SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { @@ -6996,11 +7480,8 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { switch (VT.getSimpleVT().SimpleTy) { default: break; - case MVT::v8i8: case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; - case MVT::v4i16: case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; - case MVT::v2i32: case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; } @@ -7051,6 +7532,8 @@ static bool isX86LogicalCmp(SDValue Op) { if (Op.getResNo() == 1 && (Opc == X86ISD::ADD || Opc == X86ISD::SUB || + Opc == X86ISD::ADC || + Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL || Opc == X86ISD::INC || @@ -7060,13 +7543,28 @@ static bool isX86LogicalCmp(SDValue Op) { Opc == X86ISD::AND)) return true; + if (Op.getResNo() == 2 && Opc == X86ISD::UMUL) + return true; + return false; } +static bool isZero(SDValue V) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(V); + return C && C->isNullValue(); +} + +static bool isAllOnes(SDValue V) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(V); + return C && C->isAllOnesValue(); +} + SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Cond = Op.getOperand(0); - DebugLoc dl = Op.getDebugLoc(); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); SDValue CC; if (Cond.getOpcode() == ISD::SETCC) { @@ -7075,34 +7573,44 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = NewCond; } - // (select (x == 0), -1, 0) -> (sign_bit (x - 1)) - SDValue Op1 = Op.getOperand(1); - SDValue Op2 = Op.getOperand(2); + // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y + // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y + // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y + // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y if (Cond.getOpcode() == X86ISD::SETCC && - cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue() == X86::COND_E) { + Cond.getOperand(1).getOpcode() == X86ISD::CMP && + isZero(Cond.getOperand(1).getOperand(1))) { SDValue Cmp = Cond.getOperand(1); - if (Cmp.getOpcode() == X86ISD::CMP) { - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op1); + + unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue(); + + if ((isAllOnes(Op1) || isAllOnes(Op2)) && + (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { + SDValue Y = isAllOnes(Op2) ? Op1 : Op2; + + SDValue CmpOp0 = Cmp.getOperand(0); + Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, + CmpOp0, DAG.getConstant(1, CmpOp0.getValueType())); + + SDValue Res = // Res = 0 or -1. + DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), + DAG.getConstant(X86::COND_B, MVT::i8), Cmp); + + if (isAllOnes(Op1) != (CondCode == X86::COND_E)) + Res = DAG.getNOT(DL, Res, Res.getValueType()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2); - ConstantSDNode *RHSC = - dyn_cast<ConstantSDNode>(Cmp.getOperand(1).getNode()); - if (N1C && N1C->isAllOnesValue() && - N2C && N2C->isNullValue() && - RHSC && RHSC->isNullValue()) { - SDValue CmpOp0 = Cmp.getOperand(0); - Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, - CmpOp0, DAG.getConstant(1, CmpOp0.getValueType())); - return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(), - DAG.getConstant(X86::COND_B, MVT::i8), Cmp); - } + if (N2C == 0 || !N2C->isNullValue()) + Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y); + return Res; } } - // Look pass (and (setcc_carry (cmp ...)), 1). + // Look past (and (setcc_carry (cmp ...)), 1). if (Cond.getOpcode() == ISD::AND && Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); - if (C && C->getAPIntValue() == 1) + if (C && C->getAPIntValue() == 1) Cond = Cond.getOperand(0); } @@ -7135,8 +7643,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // We know the result of AND is compared against zero. Try to match // it to BT. - if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { - SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); + if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG); if (NewSetCC.getNode()) { CC = NewSetCC.getOperand(0); Cond = NewSetCC.getOperand(1); @@ -7150,11 +7658,28 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = EmitTest(Cond, X86::COND_NE, DAG); } + // a < b ? -1 : 0 -> RES = ~setcc_carry + // a < b ? 0 : -1 -> RES = setcc_carry + // a >= b ? -1 : 0 -> RES = setcc_carry + // a >= b ? 0 : -1 -> RES = ~setcc_carry + if (Cond.getOpcode() == X86ISD::CMP) { + unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue(); + + if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && + (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) { + SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), + DAG.getConstant(X86::COND_B, MVT::i8), Cond); + if (isAllOnes(Op1) != (CondCode == X86::COND_B)) + return DAG.getNOT(DL, Res, Res.getValueType()); + return Res; + } + } + // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = { Op2, Op1, CC, Cond }; - return DAG.getNode(X86ISD::CMOV, dl, VTs, Ops, array_lengthof(Ops)); + return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); } // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or @@ -7209,7 +7734,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { if (Cond.getOpcode() == ISD::AND && Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); - if (C && C->getAPIntValue() == 1) + if (C && C->getAPIntValue() == 1) Cond = Cond.getOperand(0); } @@ -7310,7 +7835,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // We know the result of AND is compared against zero. Try to match // it to BT. - if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); if (NewSetCC.getNode()) { CC = NewSetCC.getOperand(0); @@ -7337,8 +7862,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget->isTargetCygMing() && - "This should be used only on Cygwin/Mingw targets"); + assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) && + "This should be used only on Windows targets"); DebugLoc dl = Op.getDebugLoc(); // Get the inputs. @@ -7353,9 +7878,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); Flag = Chain.getValue(1); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag); + Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); Flag = Chain.getValue(1); Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); @@ -7369,15 +7894,15 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); - if (!Subtarget->is64Bit()) { + if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy()); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, - false, false, 0); + return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); } // __va_list_tag: @@ -7388,48 +7913,107 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SmallVector<SDValue, 8> MemOps; SDValue FIN = Op.getOperand(1); // Store gp_offset - SDValue Store = DAG.getStore(Op.getOperand(0), dl, + SDValue Store = DAG.getStore(Op.getOperand(0), DL, DAG.getConstant(FuncInfo->getVarArgsGPOffset(), MVT::i32), - FIN, SV, 0, false, false, 0); + FIN, MachinePointerInfo(SV), false, false, 0); MemOps.push_back(Store); // Store fp_offset - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); - Store = DAG.getStore(Op.getOperand(0), dl, + Store = DAG.getStore(Op.getOperand(0), DL, DAG.getConstant(FuncInfo->getVarArgsFPOffset(), MVT::i32), - FIN, SV, 4, false, false, 0); + FIN, MachinePointerInfo(SV, 4), false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8, + Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, + MachinePointerInfo(SV, 8), false, false, 0); MemOps.push_back(Store); // Store ptr to reg_save_area. - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16, - false, false, 0); + Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, + MachinePointerInfo(SV, 16), false, false, 0); MemOps.push_back(Store); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], MemOps.size()); } SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { - // X86-64 va_list is a struct { i32, i32, i8*, i8* }. - assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); + assert(Subtarget->is64Bit() && + "LowerVAARG only handles 64-bit va_arg!"); + assert((Subtarget->isTargetLinux() || + Subtarget->isTargetDarwin()) && + "Unhandled target in LowerVAARG"); + assert(Op.getNode()->getNumOperands() == 4); + SDValue Chain = Op.getOperand(0); + SDValue SrcPtr = Op.getOperand(1); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + unsigned Align = Op.getConstantOperandVal(3); + DebugLoc dl = Op.getDebugLoc(); - report_fatal_error("VAArgInst is not yet implemented for x86-64!"); - return SDValue(); + EVT ArgVT = Op.getNode()->getValueType(0); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy); + uint8_t ArgMode; + + // Decide which area this value should be read from. + // TODO: Implement the AMD64 ABI in its entirety. This simple + // selection mechanism works only for the basic types. + if (ArgVT == MVT::f80) { + llvm_unreachable("va_arg for f80 not yet implemented"); + } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) { + ArgMode = 2; // Argument passed in XMM register. Use fp_offset. + } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) { + ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset. + } else { + llvm_unreachable("Unhandled argument type in LowerVAARG"); + } + + if (ArgMode == 2) { + // Sanity Check: Make sure using fp_offset makes sense. + assert(!UseSoftFloat && + !(DAG.getMachineFunction() + .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) && + Subtarget->hasXMM()); + } + + // Insert VAARG_64 node into the DAG + // VAARG_64 returns two values: Variable Argument Address, Chain + SmallVector<SDValue, 11> InstOps; + InstOps.push_back(Chain); + InstOps.push_back(SrcPtr); + InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32)); + InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8)); + InstOps.push_back(DAG.getConstant(Align, MVT::i32)); + SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other); + SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl, + VTs, &InstOps[0], InstOps.size(), + MVT::i64, + MachinePointerInfo(SV), + /*Align=*/0, + /*Volatile=*/false, + /*ReadMem=*/true, + /*WriteMem=*/true); + Chain = VAARG.getValue(1); + + // Load the next argument and return it + return DAG.getLoad(ArgVT, dl, + Chain, + VAARG, + MachinePointerInfo(), + false, false, 0); } SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { @@ -7440,11 +8024,12 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { SDValue SrcPtr = Op.getOperand(2); const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); - return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr, + return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24), 8, /*isVolatile*/false, - false, DstSV, 0, SrcSV, 0); + false, + MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } SDValue @@ -7713,10 +8298,11 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4); } else { ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); +// FIXME this must be lowered to get rid of the invalid type. } EVT VT = Op.getValueType(); - ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt); + ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(NewIntNo, MVT::i32), Op.getOperand(1), ShAmt); @@ -7740,13 +8326,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - NULL, 0, false, false, 0); + MachinePointerInfo(), false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, NULL, 0, false, false, 0); + RetAddrFI, MachinePointerInfo(), false, false, 0); } SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { @@ -7759,7 +8345,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, + MachinePointerInfo(), false, false, 0); return FrameAddr; } @@ -7784,7 +8371,8 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame, DAG.getIntPtrConstant(TD->getPointerSize())); StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); - Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0); + Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), + false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); MF.getRegInfo().addLiveOut(StoreAddrReg); @@ -7819,11 +8407,13 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 SDValue Addr = Trmp; OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 0, false, false, 0); + Addr, MachinePointerInfo(TrmpAddr), + false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); - OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, + OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, + MachinePointerInfo(TrmpAddr, 2), false, false, 2); // Load the 'nest' parameter value into R10. @@ -7832,11 +8422,13 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 10, false, false, 0); + Addr, MachinePointerInfo(TrmpAddr, 10), + false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); - OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, + OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, + MachinePointerInfo(TrmpAddr, 12), false, false, 2); // Jump to the nested function. @@ -7844,13 +8436,15 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 20, false, false, 0); + Addr, MachinePointerInfo(TrmpAddr, 20), + false, false, 0); unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr, - TrmpAddr, 22, false, false, 0); + MachinePointerInfo(TrmpAddr, 22), + false, false, 0); SDValue Ops[] = { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; @@ -7912,22 +8506,26 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), - Trmp, TrmpAddr, 0, false, false, 0); + Trmp, MachinePointerInfo(TrmpAddr), + false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); - OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, + OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, + MachinePointerInfo(TrmpAddr, 1), false, false, 1); const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode. Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr, - TrmpAddr, 5, false, false, 1); + MachinePointerInfo(TrmpAddr, 5), + false, false, 1); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); - OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, + OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, + MachinePointerInfo(TrmpAddr, 6), false, false, 1); SDValue Ops[] = @@ -7959,44 +8557,51 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); const TargetMachine &TM = MF.getTarget(); - const TargetFrameInfo &TFI = *TM.getFrameInfo(); + const TargetFrameLowering &TFI = *TM.getFrameLowering(); unsigned StackAlignment = TFI.getStackAlignment(); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); // Save FP Control Word to stack slot int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other, - DAG.getEntryNode(), StackSlot); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), + MachineMemOperand::MOStore, 2, 2); + + SDValue Ops[] = { DAG.getEntryNode(), StackSlot }; + SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, + DAG.getVTList(MVT::Other), + Ops, 2, MVT::i16, MMO); // Load FP Control Word from stack slot - SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0, - false, false, 0); + SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, + MachinePointerInfo(), false, false, 0); // Transform as necessary SDValue CWD1 = - DAG.getNode(ISD::SRL, dl, MVT::i16, - DAG.getNode(ISD::AND, dl, MVT::i16, + DAG.getNode(ISD::SRL, DL, MVT::i16, + DAG.getNode(ISD::AND, DL, MVT::i16, CWD, DAG.getConstant(0x800, MVT::i16)), DAG.getConstant(11, MVT::i8)); SDValue CWD2 = - DAG.getNode(ISD::SRL, dl, MVT::i16, - DAG.getNode(ISD::AND, dl, MVT::i16, + DAG.getNode(ISD::SRL, DL, MVT::i16, + DAG.getNode(ISD::AND, DL, MVT::i16, CWD, DAG.getConstant(0x400, MVT::i16)), DAG.getConstant(9, MVT::i8)); SDValue RetVal = - DAG.getNode(ISD::AND, dl, MVT::i16, - DAG.getNode(ISD::ADD, dl, MVT::i16, - DAG.getNode(ISD::OR, dl, MVT::i16, CWD1, CWD2), + DAG.getNode(ISD::AND, DL, MVT::i16, + DAG.getNode(ISD::ADD, DL, MVT::i16, + DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2), DAG.getConstant(1, MVT::i16)), DAG.getConstant(3, MVT::i16)); return DAG.getNode((VT.getSizeInBits() < 16 ? - ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); + ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal); } SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { @@ -8122,16 +8727,16 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const { Op.getOperand(1), DAG.getConstant(23, MVT::i32)); ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U)); - + std::vector<Constant*> CV(4, CI); Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend); - Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op); + Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op); Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); return DAG.getNode(ISD::MUL, dl, VT, Op, R); } @@ -8149,7 +8754,7 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const { Constant *C = ConstantVector::get(CVM1); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); // r = pblendv(r, psllw(r & (char16)15, 4), a); @@ -8157,31 +8762,27 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(4, MVT::i32)); - R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32), - R, M, Op); + R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); - + C = ConstantVector::get(CVM2); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, false, false, 16); - + MachinePointerInfo::getConstantPool(), + false, false, 16); + // r = pblendv(r, psllw(r & (char16)63, 2), a); M = DAG.getNode(ISD::AND, dl, VT, R, M); M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(2, MVT::i32)); - R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32), - R, M, Op); + R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); - + // return pblendv(r, r+r, a); - R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32), + R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op); return R; } @@ -8198,8 +8799,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { SDValue RHS = N->getOperand(1); unsigned BaseOp = 0; unsigned Cond = 0; - DebugLoc dl = Op.getDebugLoc(); - + DebugLoc DL = Op.getDebugLoc(); switch (Op.getOpcode()) { default: llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: @@ -8238,19 +8838,29 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { BaseOp = X86ISD::SMUL; Cond = X86::COND_O; break; - case ISD::UMULO: - BaseOp = X86ISD::UMUL; - Cond = X86::COND_B; - break; + case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs + SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0), + MVT::i32); + SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS); + + SDValue SetCC = + DAG.getNode(X86ISD::SETCC, DL, MVT::i8, + DAG.getConstant(X86::COND_O, MVT::i32), + SDValue(Sum.getNode(), 2)); + + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); + return Sum; + } } // Also sets EFLAGS. SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); - SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS); + SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); SDValue SetCC = - DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1), - DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1)); + DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1), + DAG.getConstant(Cond, MVT::i32), + SDValue(Sum.getNode(), 1)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); return Sum; @@ -8258,10 +8868,10 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ DebugLoc dl = Op.getDebugLoc(); - + if (!Subtarget->hasSSE2()) { SDValue Chain = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, + SDValue Zero = DAG.getConstant(0, Subtarget->is64Bit() ? MVT::i64 : MVT::i32); SDValue Ops[] = { DAG.getRegister(X86::ESP, MVT::i32), // Base @@ -8272,37 +8882,37 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ Zero, Chain }; - SDNode *Res = + SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops, array_lengthof(Ops)); return SDValue(Res, 0); } - + unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue(); if (!isDev) return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); - + unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); - + // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; if (!Op1 && !Op2 && !Op3 && Op4) return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0)); - + // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; if (Op1 && !Op2 && !Op3 && !Op4) return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0)); - - // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), + + // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), // (MFENCE)>; return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); } SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { EVT T = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); unsigned Reg = 0; unsigned size = 0; switch(T.getSimpleVT().SimpleTy) { @@ -8316,24 +8926,26 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { Reg = X86::RAX; size = 8; break; } - SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), dl, Reg, + SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg, Op.getOperand(2), SDValue()); SDValue Ops[] = { cpIn.getValue(0), Op.getOperand(1), Op.getOperand(3), DAG.getTargetConstant(size, MVT::i8), cpIn.getValue(1) }; - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, dl, Tys, Ops, 5); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); + MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand(); + SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, + Ops, 5, T, MMO); SDValue cpOut = - DAG.getCopyFromReg(Result.getValue(0), dl, Reg, T, Result.getValue(1)); + DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); return cpOut; } SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->is64Bit() && "Result not type legalized?"); - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue TheChain = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1); @@ -8349,16 +8961,15 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } -SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op, +SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); EVT DstVT = Op.getValueType(); - assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() && - Subtarget->hasMMX() && !DisableMMX) && - "Unexpected custom BIT_CONVERT"); - assert((DstVT == MVT::i64 || + assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() && + Subtarget->hasMMX() && "Unexpected custom BITCAST"); + assert((DstVT == MVT::i64 || (DstVT.isVector() && DstVT.getSizeInBits()==64)) && - "Unexpected custom BIT_CONVERT"); + "Unexpected custom BITCAST"); // i64 <=> MMX conversions are Legal. if (SrcVT==MVT::i64 && DstVT.isVector()) return Op; @@ -8370,6 +8981,7 @@ SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op, // All other conversions need to be expanded. return SDValue(); } + SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); @@ -8384,6 +8996,32 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { cast<AtomicSDNode>(Node)->getAlignment()); } +static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getNode()->getValueType(0); + + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + + unsigned Opc; + bool ExtraOp = false; + switch (Op.getOpcode()) { + default: assert(0 && "Invalid code"); + case ISD::ADDC: Opc = X86ISD::ADD; break; + case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break; + case ISD::SUBC: Opc = X86ISD::SUB; break; + case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break; + } + + if (!ExtraOp) + return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + Op.getOperand(1)); + return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + Op.getOperand(1), Op.getOperand(2)); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -8397,6 +9035,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); + case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); @@ -8441,7 +9081,11 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SMULO: case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); - case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG); + case ISD::BITCAST: return LowerBITCAST(Op, DAG); + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUBC: + case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); } } @@ -8478,6 +9122,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, default: assert(false && "Do not know how to custom type legalize this operation!"); return; + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUBC: + case ISD::SUBE: + // We don't want to expand or promote these. + return; case ISD::FP_TO_SINT: { std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(SDValue(N, 0), DAG, true); @@ -8485,13 +9135,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, if (FIST.getNode() != 0) { EVT VT = N->getValueType(0); // Return a load from the stack slot. - Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0, - false, false, 0)); + Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, + MachinePointerInfo(), false, false, 0)); } return; } case ISD::READCYCLECOUNTER: { - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue TheChain = N->getOperand(0); SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1); SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32, @@ -8527,8 +9177,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Ops[] = { swapInH.getValue(0), N->getOperand(1), swapInH.getValue(1) }; - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, 3); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); + MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); + SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, + Ops, 3, T, MMO); SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX, MVT::i32, Result.getValue(1)); SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX, @@ -8601,15 +9253,18 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; - case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; + case X86ISD::PANDN: return "X86ISD::PANDN"; + case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; + case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; + case X86ISD::PSIGND: return "X86ISD::PSIGND"; + case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; - case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; @@ -8637,6 +9292,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ"; case X86ISD::ADD: return "X86ISD::ADD"; case X86ISD::SUB: return "X86ISD::SUB"; + case X86ISD::ADC: return "X86ISD::ADC"; + case X86ISD::SBB: return "X86ISD::SBB"; case X86ISD::SMUL: return "X86ISD::SMUL"; case X86ISD::UMUL: return "X86ISD::UMUL"; case X86ISD::INC: return "X86ISD::INC"; @@ -8681,7 +9338,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; - case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA"; + case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; + case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; } } @@ -9203,15 +9861,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { - assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) && "Target must have SSE4.2 or AVX features enabled"); DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned Opc; - if (!Subtarget->hasAVX()) { if (memArg) Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; @@ -9224,24 +9879,318 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr; } - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc)); - + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); for (unsigned i = 0; i < numArgs; ++i) { MachineOperand &Op = MI->getOperand(i+1); - if (!(Op.isReg() && Op.isImplicit())) MIB.addOperand(Op); } - - BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) + BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) .addReg(X86::XMM0); MI->eraseFromParent(); + return BB; +} +MachineBasicBlock * +X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + // Address into RAX/EAX, other two args into ECX, EDX. + unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; + unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); + for (int i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(i)); + + unsigned ValOps = X86::AddrNumOperands; + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) + .addReg(MI->getOperand(ValOps).getReg()); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX) + .addReg(MI->getOperand(ValOps+1).getReg()); + + // The instruction doesn't actually take any operands though. + BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr)); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; +} + +MachineBasicBlock * +X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + // First arg in ECX, the second in EAX. + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) + .addReg(MI->getOperand(0).getReg()); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX) + .addReg(MI->getOperand(1).getReg()); + + // The instruction doesn't actually take any operands though. + BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr)); + + MI->eraseFromParent(); // The pseudo is gone now. return BB; } MachineBasicBlock * +X86TargetLowering::EmitVAARG64WithCustomInserter( + MachineInstr *MI, + MachineBasicBlock *MBB) const { + // Emit va_arg instruction on X86-64. + + // Operands to this pseudo-instruction: + // 0 ) Output : destination address (reg) + // 1-5) Input : va_list address (addr, i64mem) + // 6 ) ArgSize : Size (in bytes) of vararg type + // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset + // 8 ) Align : Alignment of type + // 9 ) EFLAGS (implicit-def) + + assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!"); + assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands"); + + unsigned DestReg = MI->getOperand(0).getReg(); + MachineOperand &Base = MI->getOperand(1); + MachineOperand &Scale = MI->getOperand(2); + MachineOperand &Index = MI->getOperand(3); + MachineOperand &Disp = MI->getOperand(4); + MachineOperand &Segment = MI->getOperand(5); + unsigned ArgSize = MI->getOperand(6).getImm(); + unsigned ArgMode = MI->getOperand(7).getImm(); + unsigned Align = MI->getOperand(8).getImm(); + + // Memory Reference + assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand"); + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + // Machine Information + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64); + const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); + DebugLoc DL = MI->getDebugLoc(); + + // struct va_list { + // i32 gp_offset + // i32 fp_offset + // i64 overflow_area (address) + // i64 reg_save_area (address) + // } + // sizeof(va_list) = 24 + // alignment(va_list) = 8 + + unsigned TotalNumIntRegs = 6; + unsigned TotalNumXMMRegs = 8; + bool UseGPOffset = (ArgMode == 1); + bool UseFPOffset = (ArgMode == 2); + unsigned MaxOffset = TotalNumIntRegs * 8 + + (UseFPOffset ? TotalNumXMMRegs * 16 : 0); + + /* Align ArgSize to a multiple of 8 */ + unsigned ArgSizeA8 = (ArgSize + 7) & ~7; + bool NeedsAlign = (Align > 8); + + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *overflowMBB; + MachineBasicBlock *offsetMBB; + MachineBasicBlock *endMBB; + + unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB + unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB + unsigned OffsetReg = 0; + + if (!UseGPOffset && !UseFPOffset) { + // If we only pull from the overflow region, we don't create a branch. + // We don't need to alter control flow. + OffsetDestReg = 0; // unused + OverflowDestReg = DestReg; + + offsetMBB = NULL; + overflowMBB = thisMBB; + endMBB = thisMBB; + } else { + // First emit code to check if gp_offset (or fp_offset) is below the bound. + // If so, pull the argument from reg_save_area. (branch to offsetMBB) + // If not, pull from overflow_area. (branch to overflowMBB) + // + // thisMBB + // | . + // | . + // offsetMBB overflowMBB + // | . + // | . + // endMBB + + // Registers for the PHI in endMBB + OffsetDestReg = MRI.createVirtualRegister(AddrRegClass); + OverflowDestReg = MRI.createVirtualRegister(AddrRegClass); + + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction *MF = MBB->getParent(); + overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB); + offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB); + endMBB = MF->CreateMachineBasicBlock(LLVM_BB); + + MachineFunction::iterator MBBIter = MBB; + ++MBBIter; + + // Insert the new basic blocks + MF->insert(MBBIter, offsetMBB); + MF->insert(MBBIter, overflowMBB); + MF->insert(MBBIter, endMBB); + + // Transfer the remainder of MBB and its successor edges to endMBB. + endMBB->splice(endMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(MI)), + thisMBB->end()); + endMBB->transferSuccessorsAndUpdatePHIs(thisMBB); + + // Make offsetMBB and overflowMBB successors of thisMBB + thisMBB->addSuccessor(offsetMBB); + thisMBB->addSuccessor(overflowMBB); + + // endMBB is a successor of both offsetMBB and overflowMBB + offsetMBB->addSuccessor(endMBB); + overflowMBB->addSuccessor(endMBB); + + // Load the offset value into a register + OffsetReg = MRI.createVirtualRegister(OffsetRegClass); + BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, UseFPOffset ? 4 : 0) + .addOperand(Segment) + .setMemRefs(MMOBegin, MMOEnd); + + // Check if there is enough room left to pull this argument. + BuildMI(thisMBB, DL, TII->get(X86::CMP32ri)) + .addReg(OffsetReg) + .addImm(MaxOffset + 8 - ArgSizeA8); + + // Branch to "overflowMBB" if offset >= max + // Fall through to "offsetMBB" otherwise + BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE))) + .addMBB(overflowMBB); + } + + // In offsetMBB, emit code to use the reg_save_area. + if (offsetMBB) { + assert(OffsetReg != 0); + + // Read the reg_save_area address. + unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass); + BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, 16) + .addOperand(Segment) + .setMemRefs(MMOBegin, MMOEnd); + + // Zero-extend the offset + unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); + BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) + .addImm(0) + .addReg(OffsetReg) + .addImm(X86::sub_32bit); + + // Add the offset to the reg_save_area to get the final address. + BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg) + .addReg(OffsetReg64) + .addReg(RegSaveReg); + + // Compute the offset for the next argument + unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); + BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg) + .addReg(OffsetReg) + .addImm(UseFPOffset ? 16 : 8); + + // Store it back into the va_list. + BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr)) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, UseFPOffset ? 4 : 0) + .addOperand(Segment) + .addReg(NextOffsetReg) + .setMemRefs(MMOBegin, MMOEnd); + + // Jump to endMBB + BuildMI(offsetMBB, DL, TII->get(X86::JMP_4)) + .addMBB(endMBB); + } + + // + // Emit code to use overflow area + // + + // Load the overflow_area address into a register. + unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); + BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, 8) + .addOperand(Segment) + .setMemRefs(MMOBegin, MMOEnd); + + // If we need to align it, do so. Otherwise, just copy the address + // to OverflowDestReg. + if (NeedsAlign) { + // Align the overflow address + assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2"); + unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass); + + // aligned_addr = (addr + (align-1)) & ~(align-1) + BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg) + .addReg(OverflowAddrReg) + .addImm(Align-1); + + BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg) + .addReg(TmpReg) + .addImm(~(uint64_t)(Align-1)); + } else { + BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg) + .addReg(OverflowAddrReg); + } + + // Compute the next overflow address after this argument. + // (the overflow address should be kept 8-byte aligned) + unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass); + BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg) + .addReg(OverflowDestReg) + .addImm(ArgSizeA8); + + // Store the new overflow address. + BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr)) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, 8) + .addOperand(Segment) + .addReg(NextAddrReg) + .setMemRefs(MMOBegin, MMOEnd); + + // If we branched, emit the PHI to the front of endMBB. + if (offsetMBB) { + BuildMI(*endMBB, endMBB->begin(), DL, + TII->get(X86::PHI), DestReg) + .addReg(OffsetDestReg).addMBB(offsetMBB) + .addReg(OverflowDestReg).addMBB(overflowMBB); + } + + // Erase the pseudo instruction + MI->eraseFromParent(); + + return endMBB; +} + +MachineBasicBlock * X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MachineInstr *MI, MachineBasicBlock *MBB) const { @@ -9296,8 +10245,8 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; MachineMemOperand *MMO = F->getMachineMemOperand( - PseudoSourceValue::getFixedStack(RegSaveFrameIndex), - MachineMemOperand::MOStore, Offset, + MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset), + MachineMemOperand::MOStore, /*Size=*/16, /*Align=*/16); BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr)) .addFrameIndex(RegSaveFrameIndex) @@ -9389,7 +10338,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, } MachineBasicBlock * -X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, +X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -9399,8 +10348,11 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, // FIXME: The code should be tweaked as soon as we'll try to do codegen for // mingw-w64. + const char *StackProbeSymbol = + Subtarget->isTargetWindows() ? "_chkstk" : "_alloca"; + BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca") + .addExternalSymbol(StackProbeSymbol) .addReg(X86::EAX, RegState::Implicit) .addReg(X86::ESP, RegState::Implicit) .addReg(X86::EAX, RegState::Define | RegState::Implicit) @@ -9418,30 +10370,30 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, // our load from the relocation, sticking it in either RDI (x86-64) // or EAX and doing an indirect call. The return value will then // be in the normal return register. - const X86InstrInfo *TII + const X86InstrInfo *TII = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo()); DebugLoc DL = MI->getDebugLoc(); MachineFunction *F = BB->getParent(); - bool IsWin64 = Subtarget->isTargetWin64(); - + + assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?"); assert(MI->getOperand(3).isGlobal() && "This should be a global"); - + if (Subtarget->is64Bit()) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI) .addReg(X86::RIP) .addImm(0).addReg(0) - .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, MI->getOperand(3).getTargetFlags()) .addReg(0); - MIB = BuildMI(*BB, MI, DL, TII->get(IsWin64 ? X86::WINCALL64m : X86::CALL64m)); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); addDirectMem(MIB, X86::RDI); } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) .addReg(0) .addImm(0).addReg(0) - .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, MI->getOperand(3).getTargetFlags()) .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); @@ -9451,13 +10403,13 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, TII->get(X86::MOV32rm), X86::EAX) .addReg(TII->getGlobalBaseReg(F)) .addImm(0).addReg(0) - .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, MI->getOperand(3).getTargetFlags()) .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); addDirectMem(MIB, X86::EAX); } - + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -9467,13 +10419,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); - case X86::MINGW_ALLOCA: - return EmitLoweredMingwAlloca(MI, BB); + case X86::TAILJMPd64: + case X86::TAILJMPr64: + case X86::TAILJMPm64: + assert(!"TAILJMP64 would not be touched here."); + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset. + // On AMD64, additional defs should be added before register allocation. + if (!Subtarget->isTargetWin64()) { + MI->addRegisterDefined(X86::RSI); + MI->addRegisterDefined(X86::RDI); + MI->addRegisterDefined(X86::XMM6); + MI->addRegisterDefined(X86::XMM7); + MI->addRegisterDefined(X86::XMM8); + MI->addRegisterDefined(X86::XMM9); + MI->addRegisterDefined(X86::XMM10); + MI->addRegisterDefined(X86::XMM11); + MI->addRegisterDefined(X86::XMM12); + MI->addRegisterDefined(X86::XMM13); + MI->addRegisterDefined(X86::XMM14); + MI->addRegisterDefined(X86::XMM15); + } + return BB; + case X86::WIN_ALLOCA: + return EmitLoweredWinAlloca(MI, BB); case X86::TLSCall_32: case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); case X86::CMOV_GR8: - case X86::CMOV_V1I64: case X86::CMOV_FR32: case X86::CMOV_FR64: case X86::CMOV_V4F32: @@ -9583,6 +10558,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::VPCMPESTRM128MEM: return EmitPCMP(MI, BB, 5, true /* in mem */); + // Thread synchronization. + case X86::MONITOR: + return EmitMonitor(MI, BB); + case X86::MWAIT: + return EmitMwait(MI, BB); + // Atomic Lowering. case X86::ATOMAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, @@ -9747,6 +10728,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, false); case X86::VASTART_SAVE_XMM_REGS: return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); + + case X86::VAARG_64: + return EmitVAARG64WithCustomInserter(MI, BB); } } @@ -9773,6 +10757,8 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, default: break; case X86ISD::ADD: case X86ISD::SUB: + case X86ISD::ADC: + case X86ISD::SBB: case X86ISD::SMUL: case X86ISD::UMUL: case X86ISD::INC: @@ -9791,6 +10777,16 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, } } +unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + // SETCC_CARRY sets the dest to ~0 for true or 0 for false. + if (Op.getOpcode() == X86ISD::SETCC_CARRY) + return Op.getValueType().getScalarType().getSizeInBits(); + + // Fallback case. + return 1; +} + /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the /// node is a GlobalAddress + offset. bool X86TargetLowering::isGAPlusOffset(SDNode *N, @@ -9811,13 +10807,18 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, /// if the load addresses are consecutive, non-overlapping, and in the right /// order. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { + TargetLowering::DAGCombinerInfo &DCI) { DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); if (VT.getSizeInBits() != 128) return SDValue(); + // Don't create instructions with illegal types after legalize types has run. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType())) + return SDValue(); + SmallVector<SDValue, 16> Elts; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); @@ -9877,8 +10878,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, // Store the value to a temporary stack slot. SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, - 0, false, false, 0); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, + MachinePointerInfo(), false, false, 0); // Replace each use (extract) with a load of the appropriate element. for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(), @@ -9893,11 +10894,12 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy()); SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), - OffsetVal, StackPtr); + StackPtr, OffsetVal); // Load the scalar. SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, - ScalarAddr, NULL, 0, false, false, 0); + ScalarAddr, MachinePointerInfo(), + false, false, 0); // Replace the exact with the load. DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar); @@ -10473,6 +11475,36 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, return SDValue(); } + +static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + // Want to form PANDN nodes, in the hopes of then easily combining them with + // OR and AND nodes to form PBLEND/PSIGN. + EVT VT = N->getValueType(0); + if (VT != MVT::v2i64) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + // Check LHS for vnot + if (N0.getOpcode() == ISD::XOR && + ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) + return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1); + + // Check RHS for vnot + if (N1.getOpcode() == ISD::XOR && + ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) + return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0); + + return SDValue(); +} + static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -10480,12 +11512,99 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT VT = N->getValueType(0); - if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) + if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64) return SDValue(); - // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + + // look for psign/blend + if (Subtarget->hasSSSE3()) { + if (VT == MVT::v2i64) { + // Canonicalize pandn to RHS + if (N0.getOpcode() == X86ISD::PANDN) + std::swap(N0, N1); + // or (and (m, x), (pandn m, y)) + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) { + SDValue Mask = N1.getOperand(0); + SDValue X = N1.getOperand(1); + SDValue Y; + if (N0.getOperand(0) == Mask) + Y = N0.getOperand(1); + if (N0.getOperand(1) == Mask) + Y = N0.getOperand(0); + + // Check to see if the mask appeared in both the AND and PANDN and + if (!Y.getNode()) + return SDValue(); + + // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them. + if (Mask.getOpcode() != ISD::BITCAST || + X.getOpcode() != ISD::BITCAST || + Y.getOpcode() != ISD::BITCAST) + return SDValue(); + + // Look through mask bitcast. + Mask = Mask.getOperand(0); + EVT MaskVT = Mask.getValueType(); + + // Validate that the Mask operand is a vector sra node. The sra node + // will be an intrinsic. + if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN) + return SDValue(); + + // FIXME: what to do for bytes, since there is a psignb/pblendvb, but + // there is no psrai.b + switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) { + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_sse2_psrai_d: + break; + default: return SDValue(); + } + + // Check that the SRA is all signbits. + SDValue SraC = Mask.getOperand(2); + unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); + unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); + if ((SraAmt + 1) != EltBits) + return SDValue(); + + DebugLoc DL = N->getDebugLoc(); + + // Now we know we at least have a plendvb with the mask val. See if + // we can form a psignb/w/d. + // psign = x.type == y.type == mask.type && y = sub(0, x); + X = X.getOperand(0); + Y = Y.getOperand(0); + if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && + ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && + X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){ + unsigned Opc = 0; + switch (EltBits) { + case 8: Opc = X86ISD::PSIGNB; break; + case 16: Opc = X86ISD::PSIGNW; break; + case 32: Opc = X86ISD::PSIGND; break; + default: break; + } + if (Opc) { + SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1)); + return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign); + } + } + // PBLENDVB only available on SSE 4.1 + if (!Subtarget->hasSSE41()) + return SDValue(); + + X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X); + Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y); + Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask); + Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask); + return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask); + } + } + } + + // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) @@ -10600,9 +11719,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // pair instead. if (Subtarget->is64Bit() || F64IsLegal) { EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; - SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), - Ld->getBasePtr(), Ld->getSrcValue(), - Ld->getSrcValueOffset(), Ld->isVolatile(), + SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), + Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->getAlignment()); SDValue NewChain = NewLd.getValue(1); if (TokenFactorIndex != -1) { @@ -10611,7 +11729,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, Ops.size()); } return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), - St->getSrcValue(), St->getSrcValueOffset(), + St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); } @@ -10622,11 +11740,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, DAG.getConstant(4, MVT::i32)); SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, - Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->getAlignment()); SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr, - Ld->getSrcValue(), Ld->getSrcValueOffset()+4, + Ld->getPointerInfo().getWithOffset(4), Ld->isVolatile(), Ld->isNonTemporal(), MinAlign(Ld->getAlignment(), 4)); @@ -10643,12 +11761,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, DAG.getConstant(4, MVT::i32)); SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr, - St->getSrcValue(), St->getSrcValueOffset(), + St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr, - St->getSrcValue(), - St->getSrcValueOffset() + 4, + St->getPointerInfo().getWithOffset(4), St->isVolatile(), St->isNonTemporal(), MinAlign(St->getAlignment(), 4)); @@ -10706,13 +11823,13 @@ static SDValue PerformBTCombine(SDNode *N, static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { SDValue Op = N->getOperand(0); - if (Op.getOpcode() == ISD::BIT_CONVERT) + if (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); EVT VT = N->getValueType(0), OpVT = Op.getValueType(); if (Op.getOpcode() == X86ISD::VZEXT_LOAD && VT.getVectorElementType().getSizeInBits() == OpVT.getVectorElementType().getSizeInBits()) { - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); } return SDValue(); } @@ -10743,19 +11860,106 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT +static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { + unsigned X86CC = N->getConstantOperandVal(0); + SDValue EFLAG = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + // Materialize "setb reg" as "sbb reg,reg", since it can be extended without + // a zext and produces an all-ones bit which is more useful than 0/1 in some + // cases. + if (X86CC == X86::COND_B) + return DAG.getNode(ISD::AND, DL, MVT::i8, + DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), EFLAG), + DAG.getConstant(1, MVT::i8)); + + return SDValue(); +} + +// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS +static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG, + X86TargetLowering::DAGCombinerInfo &DCI) { + // If the LHS and RHS of the ADC node are zero, then it can't overflow and + // the result is either zero or one (depending on the input carry bit). + // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1. + if (X86::isZeroNode(N->getOperand(0)) && + X86::isZeroNode(N->getOperand(1)) && + // We don't have a good way to replace an EFLAGS use, so only do this when + // dead right now. + SDValue(N, 1).use_empty()) { + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SDValue CarryOut = DAG.getConstant(0, N->getValueType(1)); + SDValue Res1 = DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B,MVT::i8), + N->getOperand(2)), + DAG.getConstant(1, VT)); + return DCI.CombineTo(N, Res1, CarryOut); + } + + return SDValue(); +} + +// fold (add Y, (sete X, 0)) -> adc 0, Y +// (add Y, (setne X, 0)) -> sbb -1, Y +// (sub (sete X, 0), Y) -> sbb 0, Y +// (sub (setne X, 0), Y) -> adc -1, Y +static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) { + DebugLoc DL = N->getDebugLoc(); + + // Look through ZExts. + SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0); + if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse()) + return SDValue(); + + SDValue SetCC = Ext.getOperand(0); + if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse()) + return SDValue(); + + X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0); + if (CC != X86::COND_E && CC != X86::COND_NE) + return SDValue(); + + SDValue Cmp = SetCC.getOperand(1); + if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() || + !X86::isZeroNode(Cmp.getOperand(1)) || + !Cmp.getOperand(0).getValueType().isInteger()) + return SDValue(); + + SDValue CmpOp0 = Cmp.getOperand(0); + SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, + DAG.getConstant(1, CmpOp0.getValueType())); + + SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1); + if (CC == X86::COND_NE) + return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB, + DL, OtherVal.getValueType(), OtherVal, + DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp); + return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC, + DL, OtherVal.getValueType(), OtherVal, + DAG.getConstant(0, OtherVal.getValueType()), NewCmp); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; switch (N->getOpcode()) { default: break; case ISD::EXTRACT_VECTOR_ELT: - return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); + return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); + case ISD::ADD: + case ISD::SUB: return OptimizeConditonalInDecrement(N, DAG); + case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); + case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case X86ISD::FXOR: @@ -10764,8 +11968,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); + case X86ISD::SETCC: return PerformSETCCCombine(N, DAG); case X86ISD::SHUFPS: // Handle all target specific shuffles case X86ISD::SHUFPD: + case X86ISD::PALIGN: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHDQ: @@ -10785,7 +11991,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PSHUFLW: case X86ISD::MOVSS: case X86ISD::MOVSD: - case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); + case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI); } return SDValue(); @@ -10892,44 +12098,14 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { // X86 Inline Assembly Support //===----------------------------------------------------------------------===// -static bool LowerToBSwap(CallInst *CI) { - // FIXME: this should verify that we are targetting a 486 or better. If not, - // we will turn this bswap into something that will be lowered to logical ops - // instead of emitting the bswap asm. For now, we don't support 486 or lower - // so don't worry about this. - - // Verify this is a simple bswap. - if (CI->getNumArgOperands() != 1 || - CI->getType() != CI->getArgOperand(0)->getType() || - !CI->getType()->isIntegerTy()) - return false; - - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); - if (!Ty || Ty->getBitWidth() % 16 != 0) - return false; - - // Okay, we can do this xform, do so now. - const Type *Tys[] = { Ty }; - Module *M = CI->getParent()->getParent()->getParent(); - Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); - - Value *Op = CI->getArgOperand(0); - Op = CallInst::Create(Int, Op, CI->getName(), CI); - - CI->replaceAllUsesWith(Op); - CI->eraseFromParent(); - return true; -} - bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); - std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints(); std::string AsmStr = IA->getAsmString(); // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" SmallVector<StringRef, 4> AsmPieces; - SplitString(AsmStr, AsmPieces, "\n"); // ; as separator? + SplitString(AsmStr, AsmPieces, ";\n"); switch (AsmPieces.size()) { default: return false; @@ -10938,6 +12114,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces.clear(); SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace. + // FIXME: this should verify that we are targetting a 486 or better. If not, + // we will turn this bswap into something that will be lowered to logical ops + // instead of emitting the bswap asm. For now, we don't support 486 or lower + // so don't worry about this. // bswap $0 if (AsmPieces.size() == 2 && (AsmPieces[0] == "bswap" || @@ -10947,7 +12127,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces[1] == "${0:q}")) { // No need to check constraints, nothing other than the equivalent of // "=r,0" would be valid here. - return LowerToBSwap(CI); + const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty || Ty->getBitWidth() % 16 != 0) + return false; + return IntrinsicLowering::LowerToByteSwap(CI); } // rorw $$8, ${0:w} --> llvm.bswap.i16 if (CI->getType()->isIntegerTy(16) && @@ -10957,35 +12140,76 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces[2] == "${0:w}" && IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { AsmPieces.clear(); - const std::string &Constraints = IA->getConstraintString(); - SplitString(StringRef(Constraints).substr(5), AsmPieces, ","); + const std::string &ConstraintsStr = IA->getConstraintString(); + SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); std::sort(AsmPieces.begin(), AsmPieces.end()); if (AsmPieces.size() == 4 && AsmPieces[0] == "~{cc}" && AsmPieces[1] == "~{dirflag}" && AsmPieces[2] == "~{flags}" && AsmPieces[3] == "~{fpsr}") { - return LowerToBSwap(CI); + const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty || Ty->getBitWidth() % 16 != 0) + return false; + return IntrinsicLowering::LowerToByteSwap(CI); } } break; case 3: - if (CI->getType()->isIntegerTy(64) && - Constraints.size() >= 2 && - Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && - Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { - // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 + if (CI->getType()->isIntegerTy(32) && + IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { SmallVector<StringRef, 4> Words; - SplitString(AsmPieces[0], Words, " \t"); - if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") { + SplitString(AsmPieces[0], Words, " \t,"); + if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" && + Words[2] == "${0:w}") { Words.clear(); - SplitString(AsmPieces[1], Words, " \t"); - if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") { + SplitString(AsmPieces[1], Words, " \t,"); + if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" && + Words[2] == "$0") { Words.clear(); SplitString(AsmPieces[2], Words, " \t,"); - if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" && - Words[2] == "%edx") { - return LowerToBSwap(CI); + if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" && + Words[2] == "${0:w}") { + AsmPieces.clear(); + const std::string &ConstraintsStr = IA->getConstraintString(); + SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); + std::sort(AsmPieces.begin(), AsmPieces.end()); + if (AsmPieces.size() == 4 && + AsmPieces[0] == "~{cc}" && + AsmPieces[1] == "~{dirflag}" && + AsmPieces[2] == "~{flags}" && + AsmPieces[3] == "~{fpsr}") { + const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty || Ty->getBitWidth() % 16 != 0) + return false; + return IntrinsicLowering::LowerToByteSwap(CI); + } + } + } + } + } + + if (CI->getType()->isIntegerTy(64)) { + InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints(); + if (Constraints.size() >= 2 && + Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && + Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { + // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 + SmallVector<StringRef, 4> Words; + SplitString(AsmPieces[0], Words, " \t"); + if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") { + Words.clear(); + SplitString(AsmPieces[1], Words, " \t"); + if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") { + Words.clear(); + SplitString(AsmPieces[2], Words, " \t,"); + if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" && + Words[2] == "%edx") { + const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty || Ty->getBitWidth() % 16 != 0) + return false; + return IntrinsicLowering::LowerToByteSwap(CI); + } } } } @@ -11003,18 +12227,32 @@ X86TargetLowering::ConstraintType X86TargetLowering::getConstraintType(const std::string &Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { - case 'A': - return C_Register; - case 'f': - case 'r': case 'R': - case 'l': case 'q': case 'Q': - case 'x': + case 'f': + case 't': + case 'u': case 'y': + case 'x': case 'Y': return C_RegisterClass; + case 'a': + case 'b': + case 'c': + case 'd': + case 'S': + case 'D': + case 'A': + return C_Register; + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'G': + case 'C': case 'e': case 'Z': return C_Other; @@ -11025,6 +12263,110 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight + X86TargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + const Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + case 'R': + case 'q': + case 'Q': + case 'a': + case 'b': + case 'c': + case 'd': + case 'S': + case 'D': + case 'A': + if (CallOperandVal->getType()->isIntegerTy()) + weight = CW_SpecificReg; + break; + case 'f': + case 't': + case 'u': + if (type->isFloatingPointTy()) + weight = CW_SpecificReg; + break; + case 'y': + if (type->isX86_MMXTy() && Subtarget->hasMMX()) + weight = CW_SpecificReg; + break; + case 'x': + case 'Y': + if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM()) + weight = CW_Register; + break; + case 'I': + if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) { + if (C->getZExtValue() <= 31) + weight = CW_Constant; + } + break; + case 'J': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if (C->getZExtValue() <= 63) + weight = CW_Constant; + } + break; + case 'K': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f)) + weight = CW_Constant; + } + break; + case 'L': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff)) + weight = CW_Constant; + } + break; + case 'M': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if (C->getZExtValue() <= 3) + weight = CW_Constant; + } + break; + case 'N': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if (C->getZExtValue() <= 0xff) + weight = CW_Constant; + } + break; + case 'G': + case 'C': + if (dyn_cast<ConstantFP>(CallOperandVal)) { + weight = CW_Constant; + } + break; + case 'e': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if ((C->getSExtValue() >= -0x80000000LL) && + (C->getSExtValue() <= 0x7fffffffLL)) + weight = CW_Constant; + } + break; + case 'Z': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if (C->getZExtValue() <= 0xffffffff) + weight = CW_Constant; + } + break; + } + return weight; +} + /// LowerXConstraint - try to replace an X constraint, which matches anything, /// with another that has more specific requirements based on the type of the /// corresponding operand. @@ -11033,9 +12375,9 @@ LowerXConstraint(EVT ConstraintVT) const { // FP X constraints get lowered to SSE1/2 registers if available, otherwise // 'f' like normal targets. if (ConstraintVT.isFloatingPoint()) { - if (Subtarget->hasSSE2()) + if (Subtarget->hasXMMInt()) return "Y"; - if (Subtarget->hasSSE1()) + if (Subtarget->hasXMM()) return "x"; } @@ -11265,10 +12607,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, if (!Subtarget->hasMMX()) break; return std::make_pair(0U, X86::VR64RegisterClass); case 'Y': // SSE_REGS if SSE2 allowed - if (!Subtarget->hasSSE2()) break; + if (!Subtarget->hasXMMInt()) break; // FALL THROUGH. case 'x': // SSE_REGS if SSE1 allowed - if (!Subtarget->hasSSE1()) break; + if (!Subtarget->hasXMM()) break; switch (VT.getSimpleVT().SimpleTy) { default: break; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d2d9b28..419da37 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -57,35 +57,6 @@ namespace llvm { /// corresponds to X86::PSRLDQ. FSRL, - /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the - /// integer source in memory and FP reg result. This corresponds to the - /// X86::FILD*m instructions. It has three inputs (token chain, address, - /// and source type) and two outputs (FP value and token chain). FILD_FLAG - /// also produces a flag). - FILD, - FILD_FLAG, - - /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the - /// integer destination in memory and a FP reg source. This corresponds - /// to the X86::FIST*m instructions and the rounding mode change stuff. It - /// has two inputs (token chain and address) and two outputs (int value - /// and token chain). - FP_TO_INT16_IN_MEM, - FP_TO_INT32_IN_MEM, - FP_TO_INT64_IN_MEM, - - /// FLD - This instruction implements an extending load to FP stack slots. - /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain - /// operand, ptr to load from, and a ValueType node indicating the type - /// to load to. - FLD, - - /// FST - This instruction implements a truncating store to FP stack - /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a - /// chain operand, value to store, address, and a ValueType to store it - /// as. - FST, - /// CALL - These operations represent an abstract X86 call /// instruction, which includes a bunch of information. In particular the /// operands of these node are: @@ -105,7 +76,7 @@ namespace llvm { /// CALL, - /// RDTSC_DAG - This operation implements the lowering for + /// RDTSC_DAG - This operation implements the lowering for /// readcyclecounter RDTSC_DAG, @@ -115,13 +86,13 @@ namespace llvm { /// X86 bit-test instructions. BT, - /// X86 SetCC. Operand 0 is condition code, and operand 1 is the flag - /// operand produced by a CMP instruction. + /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS + /// operand, usually produced by a CMP instruction. SETCC, // Same as SETCC except it's materialized with a sbb and the value is all // one's or all zero's. - SETCC_CARRY, + SETCC_CARRY, // R = carry_bit ? ~0 : 0 /// X86 conditional moves. Operand 0 and operand 1 are the two values /// to select from. Operand 2 is the condition code, and operand 3 is the @@ -157,11 +128,15 @@ namespace llvm { /// relative displacements. WrapperRIP, - /// MOVQ2DQ - Copies a 64-bit value from a vector to another vector. - /// Can be used to move a vector value from a MMX register to a XMM - /// register. + /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word + /// of an XMM vector, with the high word zero filled. MOVQ2DQ, + /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector + /// to an MMX vector. If you think this is too close to the previous + /// mnemonic, so do I; blame Intel. + MOVDQ2Q, + /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRB. PEXTRB, @@ -184,7 +159,16 @@ namespace llvm { /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, - + + /// PANDN - and with not'd value. + PANDN, + + /// PSIGNB/W/D - Copy integer sign. + PSIGNB, PSIGNW, PSIGND, + + /// PBLENDVB - Variable blend + PBLENDVB, + /// FMAX, FMIN - Floating point max and min. /// FMAX, FMIN, @@ -196,17 +180,14 @@ namespace llvm { // TLSADDR - Thread Local Storage. TLSADDR, - + // TLSCALL - Thread Local Storage. When calling to an OS provided // thunk at the address from an earlier relocation. TLSCALL, - // SegmentBaseAddress - The address segment:0 - SegmentBaseAddress, - // EH_RETURN - Exception Handling helpers. EH_RETURN, - + /// TC_RETURN - Tail call return. /// operand #0 chain /// operand #1 callee (register or absolute) @@ -214,37 +195,29 @@ namespace llvm { /// operand #3 optional in flag TC_RETURN, - // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap. - LCMPXCHG_DAG, - LCMPXCHG8_DAG, - - // FNSTCW16m - Store FP control world into i16 memory. - FNSTCW16m, - // VZEXT_MOVL - Vector move low and zero extend. VZEXT_MOVL, - // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. - VZEXT_LOAD, - // VSHL, VSRL - Vector logical left / right shift. VSHL, VSRL, // CMPPD, CMPPS - Vector double/float comparison. // CMPPD, CMPPS - Vector double/float comparison. CMPPD, CMPPS, - + // PCMP* - Vector integer comparisons. PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ, PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ, - // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results. - ADD, SUB, SMUL, UMUL, + // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. + ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, + + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. MUL_IMM, - + // PTEST - Vector bitwise comparisons PTEST, @@ -291,11 +264,17 @@ namespace llvm { // with control flow. VASTART_SAVE_XMM_REGS, - // MINGW_ALLOCA - MingW's __alloca call to do stack probing. - MINGW_ALLOCA, + // WIN_ALLOCA - Windows's _chkstk call to do stack probing. + WIN_ALLOCA, + + // Memory barrier + MEMBARRIER, + MFENCE, + SFENCE, + LFENCE, - // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, - // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, + // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - // Atomic 64-bit binary operations. ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, ATOMSUB64_DAG, @@ -304,12 +283,49 @@ namespace llvm { ATOMAND64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG, - - // Memory barrier - MEMBARRIER, - MFENCE, - SFENCE, - LFENCE + + // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap. + LCMPXCHG_DAG, + LCMPXCHG8_DAG, + + // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. + VZEXT_LOAD, + + // FNSTCW16m - Store FP control world into i16 memory. + FNSTCW16m, + + /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the + /// integer destination in memory and a FP reg source. This corresponds + /// to the X86::FIST*m instructions and the rounding mode change stuff. It + /// has two inputs (token chain and address) and two outputs (int value + /// and token chain). + FP_TO_INT16_IN_MEM, + FP_TO_INT32_IN_MEM, + FP_TO_INT64_IN_MEM, + + /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the + /// integer source in memory and FP reg result. This corresponds to the + /// X86::FILD*m instructions. It has three inputs (token chain, address, + /// and source type) and two outputs (FP value and token chain). FILD_FLAG + /// also produces a flag). + FILD, + FILD_FLAG, + + /// FLD - This instruction implements an extending load to FP stack slots. + /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain + /// operand, ptr to load from, and a ValueType node indicating the type + /// to load to. + FLD, + + /// FST - This instruction implements a truncating store to FP stack + /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a + /// chain operand, value to store, address, and a ValueType to store it + /// as. + FST, + + /// VAARG_64 - This instruction grabs the address of the next argument + /// from a va_list. (reads and modifies the va_list in memory) + VAARG_64 // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be @@ -392,6 +408,16 @@ namespace llvm { /// specifies a shuffle of elements that is suitable for input to PALIGNR. bool isPALIGNRMask(ShuffleVectorSDNode *N); + /// isVEXTRACTF128Index - Return true if the specified + /// EXTRACT_SUBVECTOR operand specifies a vector extract that is + /// suitable for input to VEXTRACTF128. + bool isVEXTRACTF128Index(SDNode *N); + + /// isVINSERTF128Index - Return true if the specified + /// INSERT_SUBVECTOR operand specifies a subvector insert that is + /// suitable for input to VINSERTF128. + bool isVINSERTF128Index(SDNode *N); + /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. @@ -409,6 +435,16 @@ namespace llvm { /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. unsigned getShufflePALIGNRImmediate(SDNode *N); + /// getExtractVEXTRACTF128Immediate - Return the appropriate + /// immediate to extract the specified EXTRACT_SUBVECTOR index + /// with VEXTRACTF128 instructions. + unsigned getExtractVEXTRACTF128Immediate(SDNode *N); + + /// getInsertVINSERTF128Immediate - Return the appropriate + /// immediate to insert at the specified INSERT_SUBVECTOR index + /// with VINSERTF128 instructions. + unsigned getInsertVINSERTF128Immediate(SDNode *N); + /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool isZeroNode(SDValue Elt); @@ -425,16 +461,13 @@ namespace llvm { public: explicit X86TargetLowering(X86TargetMachine &TM); - /// getPICBaseSymbol - Return the X86-32 PIC base. - MCSymbol *getPICBaseSymbol(const MachineFunction *MF, MCContext &Ctx) const; - virtual unsigned getJumpTableEncoding() const; virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const; - + /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC /// jumptable. virtual SDValue getPICJumpTableRelocBase(SDValue Table, @@ -442,7 +475,7 @@ namespace llvm { virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const; - + /// getStackPtrReg - Return the stack pointer register we are using: either /// ESP or RSP. unsigned getStackPtrReg() const { return X86StackPtr; } @@ -486,7 +519,7 @@ namespace llvm { virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG) const; - + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; /// isTypeDesirableForOp - Return true if the target has native support for @@ -505,7 +538,7 @@ namespace llvm { EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; - + /// getTargetNodeName - This method returns the name of a target specific /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; @@ -513,26 +546,36 @@ namespace llvm { /// getSetCCResultType - Return the ISD::SETCC ValueType virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; - /// computeMaskedBitsForTargetNode - Determine which of the bits specified - /// in Mask are known to be either zero or one and return them in the + /// computeMaskedBitsForTargetNode - Determine which of the bits specified + /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. virtual void computeMaskedBitsForTargetNode(const SDValue Op, const APInt &Mask, - APInt &KnownZero, + APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth = 0) const; + // ComputeNumSignBitsForTargetNode - Determine the number of bits in the + // operation that are sign bits. + virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const; + virtual bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; - + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; virtual bool ExpandInlineAsm(CallInst *CI) const; - + ConstraintType getConstraintType(const std::string &Constraint) const; - - std::vector<unsigned> + + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + virtual ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const; + + std::vector<unsigned> getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; @@ -546,15 +589,15 @@ namespace llvm { char ConstraintLetter, std::vector<SDValue> &Ops, SelectionDAG &DAG) const; - + /// getRegForInlineAsmConstraint - Given a physical register constraint /// (e.g. {edx}), return the register number and the register class for the /// register. This should only be used for C_Register constraints. On /// error, this returns a register number of 0. - std::pair<unsigned, const TargetRegisterClass*> + std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; @@ -609,7 +652,7 @@ namespace llvm { // shrink long double fp constant since fldt is very slow. return !X86ScalarSSEf64 || VT == MVT::f80; } - + const X86Subtarget* getSubtarget() const { return Subtarget; } @@ -650,8 +693,8 @@ namespace llvm { /// X86StackPtr - X86 physical register used as stack ptr. unsigned X86StackPtr; - - /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 + + /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 /// floating point ops. /// When SSE is available, use it for f32 operations. /// When SSE2 is available, use it for f64 operations. @@ -702,7 +745,6 @@ namespace llvm { SDValue Chain, bool IsTailCall, bool Is64Bit, int FPDiff, DebugLoc dl) const; - CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const; unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG) const; @@ -719,6 +761,8 @@ namespace llvm { SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, @@ -729,7 +773,7 @@ namespace llvm { SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) const; - SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const; + SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const; SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; @@ -794,6 +838,8 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; + virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, @@ -810,6 +856,13 @@ namespace llvm { MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB, unsigned argNum, bool inMem) const; + /// Utility functions to emit monitor and mwait instructions. These + /// need to make sure that the arguments to the intrinsic are in the + /// correct registers. + MachineBasicBlock *EmitMonitor(MachineInstr *MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const; + /// Utility function to emit atomic bitwise operations (and, or, xor). /// It takes the bitwise instruction to expand, the associated machine basic /// block, and the associated X86 opcodes for reg/reg and reg/imm. @@ -833,7 +886,7 @@ namespace llvm { unsigned immOpcL, unsigned immOpcH, bool invSrc = false) const; - + /// Utility function to emit atomic min and max. It takes the min/max /// instruction to expand, the associated basic block, and the associated /// cmov opcode for moving the min or max value. @@ -841,6 +894,11 @@ namespace llvm { MachineBasicBlock *BB, unsigned cmovOpc) const; + // Utility function to emit the low-level va_arg code for X86-64. + MachineBasicBlock *EmitVAARG64WithCustomInserter( + MachineInstr *MI, + MachineBasicBlock *MBB) const; + /// Utility function to emit the xmm reg save portion of va_start. MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( MachineInstr *BInstr, @@ -849,12 +907,15 @@ namespace llvm { MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI, + MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; - + MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI, + MachineBasicBlock *BB) const; + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td new file mode 100644 index 0000000..45d1c6b --- /dev/null +++ b/lib/Target/X86/X86Instr3DNow.td @@ -0,0 +1,77 @@ +//====- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the 3DNow! instruction set, which extends MMX to support +// floating point and also adds a few more random instructions for good measure. +// +//===----------------------------------------------------------------------===// + +// FIXME: We don't support any intrinsics for these instructions yet. + +class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> { +} + +class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic> + : I<o, F, (outs VR64:$dst), ins, + !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>, + TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode { + // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet. + let isAsmParserOnly = 1; +} + + +let Constraints = "$src1 = $dst" in { + // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. + // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp. + multiclass I3DNow_binop_rm<bits<8> opc, string Mn> { + def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>; + def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>; + } +} + +defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">; +defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">; +defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">; +defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">; +defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">; +defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">; +defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">; +defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">; +defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">; +defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">; +defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">; +defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">; +defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">; +defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">; +defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">; +defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">; +defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">; +defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">; +defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">; + + +def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; + +def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr), + "prefetch $addr", []>; + +// FIXME: Diassembler gets a bogus decode conflict. +let isAsmParserOnly = 1 in { +def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr), + "prefetchw $addr", []>; +} + +// "3DNowA" instructions +defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">; +defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">; +defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">; +defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">; +defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">; diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td deleted file mode 100644 index 0884b61..0000000 --- a/lib/Target/X86/X86Instr64bit.td +++ /dev/null @@ -1,2250 +0,0 @@ -//====- X86Instr64bit.td - Describe X86-64 Instructions ----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the X86-64 instruction set, defining the instructions, -// and properties of the instructions which are needed for code generation, -// machine code emission, and analysis. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Operand Definitions. -// - -// 64-bits but only 32 bits are significant. -def i64i32imm : Operand<i64> { - let ParserMatchClass = ImmSExti64i32AsmOperand; -} - -// 64-bits but only 32 bits are significant, and those bits are treated as being -// pc relative. -def i64i32imm_pcrel : Operand<i64> { - let PrintMethod = "print_pcrel_imm"; - let ParserMatchClass = X86AbsMemAsmOperand; -} - - -// 64-bits but only 8 bits are significant. -def i64i8imm : Operand<i64> { - let ParserMatchClass = ImmSExti64i8AsmOperand; -} - -def lea64_32mem : Operand<i32> { - let PrintMethod = "printi32mem"; - let AsmOperandLowerMethod = "lower_lea64_32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; -} - - -// Special i64mem for addresses of load folding tail calls. These are not -// allowed to use callee-saved registers since they must be scheduled -// after callee-saved register are popped. -def i64mem_TC : Operand<i64> { - let PrintMethod = "printi64mem"; - let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; -} - -//===----------------------------------------------------------------------===// -// Complex Pattern Definitions. -// -def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr", - [add, sub, mul, X86mul_imm, shl, or, frameindex, - X86WrapperRIP], []>; - -def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr", - [tglobaltlsaddr], []>; - -//===----------------------------------------------------------------------===// -// Pattern fragments. -// - -def i64immSExt8 : PatLeaf<(i64 immSext8)>; - -def GetLo32XForm : SDNodeXForm<imm, [{ - // Transformation function: get the low 32 bits. - return getI32Imm((unsigned)N->getZExtValue()); -}]>; - -def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>; - - -def i64immZExt32 : PatLeaf<(i64 imm), [{ - // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit - // unsignedsign extended field. - return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue(); -}]>; - -def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>; -def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>; -def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>; - -def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>; -def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>; -def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>; -def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>; - -def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>; -def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>; -def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>; -def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>; - -//===----------------------------------------------------------------------===// -// Instruction list... -// - -// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into -// a stack adjustment and the codegen must know that they may modify the stack -// pointer before prolog-epilog rewriting occurs. -// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become -// sub / add which can clobber EFLAGS. -let Defs = [RSP, EFLAGS], Uses = [RSP] in { -def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt), - "#ADJCALLSTACKDOWN", - [(X86callseq_start timm:$amt)]>, - Requires<[In64BitMode]>; -def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), - "#ADJCALLSTACKUP", - [(X86callseq_end timm:$amt1, timm:$amt2)]>, - Requires<[In64BitMode]>; -} - -// Interrupt Instructions -def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iret{q}", []>; - -//===----------------------------------------------------------------------===// -// Call Instructions... -// -let isCall = 1 in - // All calls clobber the non-callee saved registers. RSP is marked as - // a use to prevent stack-pointer assignments that appear immediately - // before calls from potentially appearing dead. Uses for argument - // registers are added manually. - let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in { - - // NOTE: this pattern doesn't match "X86call imm", because we do not know - // that the offset between an arbitrary immediate and the call will fit in - // the 32-bit pcrel field that we have. - def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, - (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "call{q}\t$dst", []>, - Requires<[In64BitMode, NotWin64]>; - def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), - "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, - Requires<[NotWin64]>; - def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), - "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, - Requires<[NotWin64]>; - - def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), - "lcall{q}\t{*}$dst", []>; - } - - // FIXME: We need to teach codegen about single list of call-clobbered - // registers. -let isCall = 1, isCodeGenOnly = 1 in - // All calls clobber the non-callee saved registers. RSP is marked as - // a use to prevent stack-pointer assignments that appear immediately - // before calls from potentially appearing dead. Uses for argument - // registers are added manually. - let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, - FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], - Uses = [RSP] in { - def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, - (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "call\t$dst", []>, - Requires<[IsWin64]>; - def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), - "call\t{*}$dst", - [(X86call GR64:$dst)]>, Requires<[IsWin64]>; - def WINCALL64m : I<0xFF, MRM2m, (outs), - (ins i64mem:$dst, variable_ops), "call\t{*}$dst", - [(X86call (loadi64 addr:$dst))]>, - Requires<[IsWin64]>; - } - - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1 in - let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in { - def TCRETURNdi64 : I<0, Pseudo, (outs), - (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops), - "#TC_RETURN $dst $offset", []>; - def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset, - variable_ops), - "#TC_RETURN $dst $offset", []>; - let mayLoad = 1 in - def TCRETURNmi64 : I<0, Pseudo, (outs), - (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), - "#TC_RETURN $dst $offset", []>; - - def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), - (ins i64i32imm_pcrel:$dst, variable_ops), - "jmp\t$dst # TAILCALL", []>; - def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops), - "jmp{q}\t{*}$dst # TAILCALL", []>; - - let mayLoad = 1 in - def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops), - "jmp{q}\t{*}$dst # TAILCALL", []>; -} - -// Branches -let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { - def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst), - "jmp{q}\t$dst", []>; - def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", - [(brind GR64:$dst)]>, Requires<[In64BitMode]>; - def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", - [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>; - def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), - "ljmp{q}\t{*}$dst", []>; -} - -//===----------------------------------------------------------------------===// -// EH Pseudo Instructions -// -let isTerminator = 1, isReturn = 1, isBarrier = 1, - hasCtrlDep = 1, isCodeGenOnly = 1 in { -def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), - "ret\t#eh_return, addr: $addr", - [(X86ehret GR64:$addr)]>; - -} - -//===----------------------------------------------------------------------===// -// Miscellaneous Instructions... -// - -def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; -let mayLoad = 1 in -def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; - -let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in -def LEAVE64 : I<0xC9, RawFrm, - (outs), (ins), "leave", []>, Requires<[In64BitMode]>; -let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { -let mayLoad = 1 in { -def POP64r : I<0x58, AddRegFrm, - (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; -def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; -def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>; -} -let mayStore = 1 in { -def PUSH64r : I<0x50, AddRegFrm, - (outs), (ins GR64:$reg), "push{q}\t$reg", []>; -def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>; -def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>; -} -} - -let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { -def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), - "push{q}\t$imm", []>; -def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), - "push{q}\t$imm", []>; -def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm), - "push{q}\t$imm", []>; -} - -let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in -def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>, - Requires<[In64BitMode]>; -let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in -def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>, - Requires<[In64BitMode]>; - -def LEA64_32r : I<0x8D, MRMSrcMem, - (outs GR32:$dst), (ins lea64_32mem:$src), - "lea{l}\t{$src|$dst}, {$dst|$src}", - [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>; - -let isReMaterializable = 1 in -def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "lea{q}\t{$src|$dst}, {$dst|$src}", - [(set GR64:$dst, lea64addr:$src)]>; - -let Constraints = "$src = $dst" in -def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), - "bswap{q}\t$dst", - [(set GR64:$dst, (bswap GR64:$src))]>, TB; - -// Bit scan instructions. -let Defs = [EFLAGS] in { -def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "bsf{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB; -def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "bsf{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB; - -def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB; -def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB; -} // Defs = [EFLAGS] - -// Repeat string ops -let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in -def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", - [(X86rep_movs i64)]>, REP; -let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in -def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", - [(X86rep_stos i64)]>, REP; - -let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in -def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>; - -let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in -def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>; - -def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>; - -def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; - -// Fast system-call instructions -def SYSEXIT64 : RI<0x35, RawFrm, - (outs), (ins), "sysexit", []>, TB, Requires<[In64BitMode]>; - -//===----------------------------------------------------------------------===// -// Move Instructions... -// - -let neverHasSideEffects = 1 in -def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; - -let isReMaterializable = 1, isAsCheapAsAMove = 1 in { -def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), - "movabs{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, imm:$src)]>; -def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), - "mov{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, i64immSExt32:$src)]>; -} - -// The assembler accepts movq of a 64-bit immediate as an alternate spelling of -// movabsq. -let isAsmParserOnly = 1 in { -def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; -} - -let isCodeGenOnly = 1 in { -def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; -} - -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "mov{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (load addr:$src))]>; - -def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", - [(store GR64:$src, addr:$dst)]>; -def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), - "mov{q}\t{$src, $dst|$dst, $src}", - [(store i64immSExt32:$src, addr:$dst)]>; - -/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC. -let isCodeGenOnly = 1 in { -let neverHasSideEffects = 1 in -def MOV64rr_TC : RI<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; - -let mayLoad = 1, - canFoldAsLoad = 1, isReMaterializable = 1 in -def MOV64rm_TC : RI<0x8B, MRMSrcMem, (outs GR64_TC:$dst), (ins i64mem_TC:$src), - "mov{q}\t{$src, $dst|$dst, $src}", - []>; - -let mayStore = 1 in -def MOV64mr_TC : RI<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src), - "mov{q}\t{$src, $dst|$dst, $src}", - []>; -} - -// FIXME: These definitions are utterly broken -// Just leave them commented out for now because they're useless outside -// of the large code model, and most compilers won't generate the instructions -// in question. -/* -def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src), - "mov{q}\t{$src, %rax|%rax, $src}", []>; -def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src), - "mov{q}\t{$src, %rax|%rax, $src}", []>; -def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins), - "mov{q}\t{%rax, $dst|$dst, %rax}", []>; -def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), - "mov{q}\t{%rax, $dst|$dst, %rax}", []>; -*/ - -// Moves to and from segment registers -def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; -def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; -def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; -def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; - -// Moves to and from debug registers -def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; - -// Moves to and from control registers -def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; - -// Sign/Zero extenders - -// MOVSX64rr8 always has a REX prefix and it has an 8-bit register -// operand, which makes it a rare instruction with an 8-bit register -// operand that can never access an h register. If support for h registers -// were generalized, this would require a special register class. -def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR8:$src))]>, TB; -def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), - "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB; -def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR16:$src))]>, TB; -def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB; -def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), - "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR32:$src))]>; -def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), - "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i32 addr:$src))]>; - -// movzbq and movzwq encodings for the disassembler -def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), - "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; -def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), - "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; -def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; -def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; - -// Use movzbl instead of movzbq when the destination is a register; it's -// equivalent due to implicit zero-extending, and it has a smaller encoding. -def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "", [(set GR64:$dst, (zext GR8:$src))]>, TB; -def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), - "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; -// Use movzwl instead of movzwq when the destination is a register; it's -// equivalent due to implicit zero-extending, and it has a smaller encoding. -def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "", [(set GR64:$dst, (zext GR16:$src))]>, TB; -def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; - -// There's no movzlq instruction, but movl can be used for this purpose, using -// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero -// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit -// zero-extension, however this isn't possible when the 32-bit value is -// defined by a truncate or is copied from something where the high bits aren't -// necessarily all zero. In such cases, we fall back to these explicit zext -// instructions. -def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), - "", [(set GR64:$dst, (zext GR32:$src))]>; -def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), - "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; - -// Any instruction that defines a 32-bit result leaves the high half of the -// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may -// be copying from a truncate. And x86's cmov doesn't do anything if the -// condition is false. But any other 32-bit operation will zero-extend -// up to 64 bits. -def def32 : PatLeaf<(i32 GR32:$src), [{ - return N->getOpcode() != ISD::TRUNCATE && - N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && - N->getOpcode() != ISD::CopyFromReg && - N->getOpcode() != X86ISD::CMOV; -}]>; - -// In the case of a 32-bit def that is known to implicitly zero-extend, -// we can use a SUBREG_TO_REG. -def : Pat<(i64 (zext def32:$src)), - (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; - -let neverHasSideEffects = 1 in { - let Defs = [RAX], Uses = [EAX] in - def CDQE : RI<0x98, RawFrm, (outs), (ins), - "{cltq|cdqe}", []>; // RAX = signext(EAX) - - let Defs = [RAX,RDX], Uses = [RAX] in - def CQO : RI<0x99, RawFrm, (outs), (ins), - "{cqto|cqo}", []>; // RDX:RAX = signext(RAX) -} - -//===----------------------------------------------------------------------===// -// Arithmetic Instructions... -// - -let Defs = [EFLAGS] in { - -def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src), - "add{q}\t{$src, %rax|%rax, $src}", []>; - -let Constraints = "$src1 = $dst" in { -let isConvertibleToThreeAddress = 1 in { -let isCommutable = 1 in -// Register-Register Addition -def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86add_flag GR64:$src1, GR64:$src2))]>; - -// These are alternate spellings for use by the disassembler, we mark them as -// code gen only to ensure they aren't matched by the assembler. -let isCodeGenOnly = 1 in { - def ADD64rr_alt : RI<0x03, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -// Register-Integer Addition -def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst), - (ins GR64:$src1, i64i8imm:$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86add_flag GR64:$src1, i64immSExt8:$src2))]>; -def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst), - (ins GR64:$src1, i64i32imm:$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86add_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isConvertibleToThreeAddress - -// Register-Memory Addition -def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86add_flag GR64:$src1, (load addr:$src2)))]>; - -} // Constraints = "$src1 = $dst" - -// Memory-Register Addition -def ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), GR64:$src2), addr:$dst), - (implicit EFLAGS)]>; -def ADD64mi8 : RIi8<0x83, MRM0m, (outs), (ins i64mem:$dst, i64i8imm :$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst), - (implicit EFLAGS)]>; -def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst), - (implicit EFLAGS)]>; - -let Uses = [EFLAGS] in { - -def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src), - "adc{q}\t{$src, %rax|%rax, $src}", []>; - -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in -def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>; - -let isCodeGenOnly = 1 in { -def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst), - (ins GR64:$src1, GR64:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", []>; -} - -def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>; - -def ADC64ri8 : RIi8<0x83, MRM2r, (outs GR64:$dst), - (ins GR64:$src1, i64i8imm:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>; -def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), - (ins GR64:$src1, i64i32imm:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>; -} // Constraints = "$src1 = $dst" - -def ADC64mr : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>; -def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i64immSExt8:$src2), - addr:$dst)]>; -def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i64immSExt32:$src2), - addr:$dst)]>; -} // Uses = [EFLAGS] - -let Constraints = "$src1 = $dst" in { -// Register-Register Subtraction -def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86sub_flag GR64:$src1, GR64:$src2))]>; - -let isCodeGenOnly = 1 in { -def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", []>; -} - -// Register-Memory Subtraction -def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86sub_flag GR64:$src1, (load addr:$src2)))]>; - -// Register-Integer Subtraction -def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst), - (ins GR64:$src1, i64i8imm:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86sub_flag GR64:$src1, i64immSExt8:$src2))]>; -def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), - (ins GR64:$src1, i64i32imm:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>; -} // Constraints = "$src1 = $dst" - -def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src), - "sub{q}\t{$src, %rax|%rax, $src}", []>; - -// Memory-Register Subtraction -def SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), GR64:$src2), addr:$dst), - (implicit EFLAGS)]>; - -// Memory-Integer Subtraction -def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), i64immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)]>; -def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), i64immSExt32:$src2), - addr:$dst), - (implicit EFLAGS)]>; - -let Uses = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { -def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>; - -let isCodeGenOnly = 1 in { -def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", []>; -} - -def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>; - -def SBB64ri8 : RIi8<0x83, MRM3r, (outs GR64:$dst), - (ins GR64:$src1, i64i8imm:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>; -def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), - (ins GR64:$src1, i64i32imm:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; -} // Constraints = "$src1 = $dst" - -def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src), - "sbb{q}\t{$src, %rax|%rax, $src}", []>; - -def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>; -def SBB64mi8 : RIi8<0x83, MRM3m, (outs), (ins i64mem:$dst, i64i8imm :$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; -def SBB64mi32 : RIi32<0x81, MRM3m, (outs), (ins i64mem:$dst, i64i32imm:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>; -} // Uses = [EFLAGS] -} // Defs = [EFLAGS] - -// Unsigned multiplication -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in { -def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), - "mul{q}\t$src", []>; // RAX,RDX = RAX*GR64 -let mayLoad = 1 in -def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] - -// Signed multiplication -def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), - "imul{q}\t$src", []>; // RAX,RDX = RAX*GR64 -let mayLoad = 1 in -def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] -} - -let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in -// Register-Register Signed Integer Multiplication -def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "imul{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, GR64:$src2))]>, TB; - -// Register-Memory Signed Integer Multiplication -def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "imul{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB; -} // Constraints = "$src1 = $dst" - -// Suprisingly enough, these are not two address instructions! - -// Register-Integer Signed Integer Multiplication -def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 - (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), - "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>; -def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32 - (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), - "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>; - -// Memory-Integer Signed Integer Multiplication -def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 - (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2), - "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), - i64immSExt8:$src2))]>; -def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32 - (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2), - "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), - i64immSExt32:$src2))]>; -} // Defs = [EFLAGS] - -// Unsigned division / remainder -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in { -// RDX:RAX/r64 = RAX,RDX -def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), - "div{q}\t$src", []>; -// Signed division / remainder -// RDX:RAX/r64 = RAX,RDX -def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), - "idiv{q}\t$src", []>; -let mayLoad = 1 in { -// RDX:RAX/[mem64] = RAX,RDX -def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), - "div{q}\t$src", []>; -// RDX:RAX/[mem64] = RAX,RDX -def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), - "idiv{q}\t$src", []>; -} -} - -// Unary instructions -let Defs = [EFLAGS], CodeSize = 2 in { -let Constraints = "$src = $dst" in -def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst", - [(set GR64:$dst, (ineg GR64:$src)), - (implicit EFLAGS)]>; -def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", - [(store (ineg (loadi64 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; - -let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in -def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>; -def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", - [(store (add (loadi64 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>; - -let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in -def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>; -def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", - [(store (add (loadi64 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>; - -// In 64-bit mode, single byte INC and DEC cannot be encoded. -let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in { -// Can transform into LEA. -def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), - "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>, - OpSize, Requires<[In64BitMode]>; -def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src), - "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>, - Requires<[In64BitMode]>; -def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src), - "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>, - OpSize, Requires<[In64BitMode]>; -def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), - "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>, - Requires<[In64BitMode]>; -} // Constraints = "$src = $dst", isConvertibleToThreeAddress - -// These are duplicates of their 32-bit counterparts. Only needed so X86 knows -// how to unfold them. -def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", - [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In64BitMode]>; -def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", - [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In64BitMode]>; -def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", - [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In64BitMode]>; -def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", - [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In64BitMode]>; -} // Defs = [EFLAGS], CodeSize - - -let Defs = [EFLAGS] in { -// Shift instructions -let Constraints = "$src1 = $dst" in { -let Uses = [CL] in -def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), - "shl{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (shl GR64:$src1, CL))]>; -let isConvertibleToThreeAddress = 1 in // Can transform into LEA. -def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), - "shl{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; -// NOTE: We don't include patterns for shifts of a register by one, because -// 'add reg,reg' is cheaper. -def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), - "shl{q}\t$dst", []>; -} // Constraints = "$src1 = $dst" - -let Uses = [CL] in -def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), - "shl{q}\t{%cl, $dst|$dst, %CL}", - [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>; -def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src), - "shl{q}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; -def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), - "shl{q}\t$dst", - [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; - -let Constraints = "$src1 = $dst" in { -let Uses = [CL] in -def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1), - "shr{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (srl GR64:$src1, CL))]>; -def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), - "shr{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>; -def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), - "shr{q}\t$dst", - [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; -} // Constraints = "$src1 = $dst" - -let Uses = [CL] in -def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), - "shr{q}\t{%cl, $dst|$dst, %CL}", - [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>; -def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src), - "shr{q}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; -def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), - "shr{q}\t$dst", - [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; - -let Constraints = "$src1 = $dst" in { -let Uses = [CL] in -def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1), - "sar{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (sra GR64:$src1, CL))]>; -def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), - "sar{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>; -def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), - "sar{q}\t$dst", - [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; -} // Constraints = "$src = $dst" - -let Uses = [CL] in -def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), - "sar{q}\t{%cl, $dst|$dst, %CL}", - [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>; -def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src), - "sar{q}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; -def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), - "sar{q}\t$dst", - [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; - -// Rotate instructions - -let Constraints = "$src = $dst" in { -def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), - "rcl{q}\t{1, $dst|$dst, 1}", []>; -def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; - -def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src), - "rcr{q}\t{1, $dst|$dst, 1}", []>; -def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; - -let Uses = [CL] in { -def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; -} -} // Constraints = "$src = $dst" - -def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), - "rcl{q}\t{1, $dst|$dst, 1}", []>; -def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), - "rcr{q}\t{1, $dst|$dst, 1}", []>; -def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; - -let Uses = [CL] in { -def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; -} - -let Constraints = "$src1 = $dst" in { -let Uses = [CL] in -def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1), - "rol{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (rotl GR64:$src1, CL))]>; -def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), - "rol{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>; -def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), - "rol{q}\t$dst", - [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; -} // Constraints = "$src1 = $dst" - -let Uses = [CL] in -def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), - "rol{q}\t{%cl, $dst|$dst, %CL}", - [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>; -def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src), - "rol{q}\t{$src, $dst|$dst, $src}", - [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; -def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), - "rol{q}\t$dst", - [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; - -let Constraints = "$src1 = $dst" in { -let Uses = [CL] in -def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1), - "ror{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (rotr GR64:$src1, CL))]>; -def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), - "ror{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>; -def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), - "ror{q}\t$dst", - [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; -} // Constraints = "$src1 = $dst" - -let Uses = [CL] in -def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), - "ror{q}\t{%cl, $dst|$dst, %CL}", - [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>; -def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src), - "ror{q}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; -def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), - "ror{q}\t$dst", - [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; - -// Double shift instructions (generalizations of rotate) -let Constraints = "$src1 = $dst" in { -let Uses = [CL] in { -def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", - [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, - TB; -def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", - [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, - TB; -} - -let isCommutable = 1 in { // FIXME: Update X86InstrInfo::commuteInstruction -def SHLD64rri8 : RIi8<0xA4, MRMDestReg, - (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2, i8imm:$src3), - "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, - (i8 imm:$src3)))]>, - TB; -def SHRD64rri8 : RIi8<0xAC, MRMDestReg, - (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2, i8imm:$src3), - "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, - (i8 imm:$src3)))]>, - TB; -} // isCommutable -} // Constraints = "$src1 = $dst" - -let Uses = [CL] in { -def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", - [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL), - addr:$dst)]>, TB; -def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", - [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL), - addr:$dst)]>, TB; -} -def SHLD64mri8 : RIi8<0xA4, MRMDestMem, - (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), - "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shld (loadi64 addr:$dst), GR64:$src2, - (i8 imm:$src3)), addr:$dst)]>, - TB; -def SHRD64mri8 : RIi8<0xAC, MRMDestMem, - (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), - "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, - (i8 imm:$src3)), addr:$dst)]>, - TB; -} // Defs = [EFLAGS] - -//===----------------------------------------------------------------------===// -// Logical Instructions... -// - -let Constraints = "$src = $dst" , AddedComplexity = 15 in -def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst", - [(set GR64:$dst, (not GR64:$src))]>; -def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", - [(store (not (loadi64 addr:$dst)), addr:$dst)]>; - -let Defs = [EFLAGS] in { -def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src), - "and{q}\t{$src, %rax|%rax, $src}", []>; - -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in -def AND64rr : RI<0x21, MRMDestReg, - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "and{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86and_flag GR64:$src1, GR64:$src2))]>; -let isCodeGenOnly = 1 in { -def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "and{q}\t{$src2, $dst|$dst, $src2}", []>; -} -def AND64rm : RI<0x23, MRMSrcMem, - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "and{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86and_flag GR64:$src1, (load addr:$src2)))]>; -def AND64ri8 : RIi8<0x83, MRM4r, - (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), - "and{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86and_flag GR64:$src1, i64immSExt8:$src2))]>; -def AND64ri32 : RIi32<0x81, MRM4r, - (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), - "and{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86and_flag GR64:$src1, i64immSExt32:$src2))]>; -} // Constraints = "$src1 = $dst" - -def AND64mr : RI<0x21, MRMDestMem, - (outs), (ins i64mem:$dst, GR64:$src), - "and{q}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), GR64:$src), addr:$dst), - (implicit EFLAGS)]>; -def AND64mi8 : RIi8<0x83, MRM4m, - (outs), (ins i64mem:$dst, i64i8imm :$src), - "and{q}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>; -def AND64mi32 : RIi32<0x81, MRM4m, - (outs), (ins i64mem:$dst, i64i32imm:$src), - "and{q}\t{$src, $dst|$dst, $src}", - [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), - (implicit EFLAGS)]>; - -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in -def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86or_flag GR64:$src1, GR64:$src2))]>; -let isCodeGenOnly = 1 in { -def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "or{q}\t{$src2, $dst|$dst, $src2}", []>; -} -def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86or_flag GR64:$src1, (load addr:$src2)))]>; -def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst), - (ins GR64:$src1, i64i8imm:$src2), - "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86or_flag GR64:$src1, i64immSExt8:$src2))]>; -def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), - (ins GR64:$src1, i64i32imm:$src2), - "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86or_flag GR64:$src1, i64immSExt32:$src2))]>; -} // Constraints = "$src1 = $dst" - -def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "or{q}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), GR64:$src), addr:$dst), - (implicit EFLAGS)]>; -def OR64mi8 : RIi8<0x83, MRM1m, (outs), (ins i64mem:$dst, i64i8imm:$src), - "or{q}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>; -def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src), - "or{q}\t{$src, $dst|$dst, $src}", - [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), - (implicit EFLAGS)]>; - -def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src), - "or{q}\t{$src, %rax|%rax, $src}", []>; - -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in -def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "xor{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86xor_flag GR64:$src1, GR64:$src2))]>; -let isCodeGenOnly = 1 in { -def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "xor{q}\t{$src2, $dst|$dst, $src2}", []>; -} -def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "xor{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86xor_flag GR64:$src1, (load addr:$src2)))]>; -def XOR64ri8 : RIi8<0x83, MRM6r, (outs GR64:$dst), - (ins GR64:$src1, i64i8imm:$src2), - "xor{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86xor_flag GR64:$src1, i64immSExt8:$src2))]>; -def XOR64ri32 : RIi32<0x81, MRM6r, - (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), - "xor{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>; -} // Constraints = "$src1 = $dst" - -def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "xor{q}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), GR64:$src), addr:$dst), - (implicit EFLAGS)]>; -def XOR64mi8 : RIi8<0x83, MRM6m, (outs), (ins i64mem:$dst, i64i8imm :$src), - "xor{q}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>; -def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src), - "xor{q}\t{$src, $dst|$dst, $src}", - [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), - (implicit EFLAGS)]>; - -def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src), - "xor{q}\t{$src, %rax|%rax, $src}", []>; - -} // Defs = [EFLAGS] - -//===----------------------------------------------------------------------===// -// Comparison Instructions... -// - -// Integer comparison -let Defs = [EFLAGS] in { -def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src), - "test{q}\t{$src, %rax|%rax, $src}", []>; -let isCommutable = 1 in -def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), - "test{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and GR64:$src1, GR64:$src2), 0))]>; -def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2), - "test{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and GR64:$src1, (loadi64 addr:$src2)), - 0))]>; -def TEST64ri32 : RIi32<0xF7, MRM0r, (outs), - (ins GR64:$src1, i64i32imm:$src2), - "test{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and GR64:$src1, i64immSExt32:$src2), - 0))]>; -def TEST64mi32 : RIi32<0xF7, MRM0m, (outs), - (ins i64mem:$src1, i64i32imm:$src2), - "test{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and (loadi64 addr:$src1), - i64immSExt32:$src2), 0))]>; - - -def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src), - "cmp{q}\t{$src, %rax|%rax, $src}", []>; -def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR64:$src1, GR64:$src2))]>; - -// These are alternate spellings for use by the disassembler, we mark them as -// code gen only to ensure they aren't matched by the assembler. -let isCodeGenOnly = 1 in { - def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", []>; -} - -def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi64 addr:$src1), GR64:$src2))]>; -def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR64:$src1, (loadi64 addr:$src2)))]>; -def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt8:$src2))]>; -def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt32:$src2))]>; -def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi64 addr:$src1), - i64immSExt8:$src2))]>; -def CMP64mi32 : RIi32<0x81, MRM7m, (outs), - (ins i64mem:$src1, i64i32imm:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi64 addr:$src1), - i64immSExt32:$src2))]>; -} // Defs = [EFLAGS] - -// Bit tests. -// TODO: BTC, BTR, and BTS -let Defs = [EFLAGS] in { -def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), - "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB; - -// Unlike with the register+register form, the memory+register form of the -// bt instruction does not ignore the high bits of the index. From ISel's -// perspective, this is pretty bizarre. Disable these instructions for now. -def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "bt{q}\t{$src2, $src1|$src1, $src2}", -// [(X86bt (loadi64 addr:$src1), GR64:$src2), -// (implicit EFLAGS)] - [] - >, TB; - -def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; -// Note that these instructions don't need FastBTMem because that -// only applies when the other operand is in a register. When it's -// an immediate, bt is still fast. -def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt (loadi64 addr:$src1), - i64immSExt8:$src2))]>, TB; - -def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; - -def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; - -def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -} // Defs = [EFLAGS] - -// Conditional moves -let Uses = [EFLAGS], Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { -def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovb{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_B, EFLAGS))]>, TB; -def CMOVAE64rr: RI<0x43, MRMSrcReg, // if >=u, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovae{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_AE, EFLAGS))]>, TB; -def CMOVE64rr : RI<0x44, MRMSrcReg, // if ==, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmove{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_E, EFLAGS))]>, TB; -def CMOVNE64rr: RI<0x45, MRMSrcReg, // if !=, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovne{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_NE, EFLAGS))]>, TB; -def CMOVBE64rr: RI<0x46, MRMSrcReg, // if <=u, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovbe{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_BE, EFLAGS))]>, TB; -def CMOVA64rr : RI<0x47, MRMSrcReg, // if >u, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmova{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_A, EFLAGS))]>, TB; -def CMOVL64rr : RI<0x4C, MRMSrcReg, // if <s, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovl{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_L, EFLAGS))]>, TB; -def CMOVGE64rr: RI<0x4D, MRMSrcReg, // if >=s, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovge{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_GE, EFLAGS))]>, TB; -def CMOVLE64rr: RI<0x4E, MRMSrcReg, // if <=s, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovle{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_LE, EFLAGS))]>, TB; -def CMOVG64rr : RI<0x4F, MRMSrcReg, // if >s, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovg{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_G, EFLAGS))]>, TB; -def CMOVS64rr : RI<0x48, MRMSrcReg, // if signed, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovs{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_S, EFLAGS))]>, TB; -def CMOVNS64rr: RI<0x49, MRMSrcReg, // if !signed, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovns{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_NS, EFLAGS))]>, TB; -def CMOVP64rr : RI<0x4A, MRMSrcReg, // if parity, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovp{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_P, EFLAGS))]>, TB; -def CMOVNP64rr : RI<0x4B, MRMSrcReg, // if !parity, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovnp{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_NP, EFLAGS))]>, TB; -def CMOVO64rr : RI<0x40, MRMSrcReg, // if overflow, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovo{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_O, EFLAGS))]>, TB; -def CMOVNO64rr : RI<0x41, MRMSrcReg, // if !overflow, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovno{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_NO, EFLAGS))]>, TB; -} // isCommutable = 1 - -def CMOVB64rm : RI<0x42, MRMSrcMem, // if <u, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovb{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_B, EFLAGS))]>, TB; -def CMOVAE64rm: RI<0x43, MRMSrcMem, // if >=u, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovae{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_AE, EFLAGS))]>, TB; -def CMOVE64rm : RI<0x44, MRMSrcMem, // if ==, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmove{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_E, EFLAGS))]>, TB; -def CMOVNE64rm: RI<0x45, MRMSrcMem, // if !=, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovne{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_NE, EFLAGS))]>, TB; -def CMOVBE64rm: RI<0x46, MRMSrcMem, // if <=u, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovbe{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_BE, EFLAGS))]>, TB; -def CMOVA64rm : RI<0x47, MRMSrcMem, // if >u, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmova{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_A, EFLAGS))]>, TB; -def CMOVL64rm : RI<0x4C, MRMSrcMem, // if <s, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovl{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_L, EFLAGS))]>, TB; -def CMOVGE64rm: RI<0x4D, MRMSrcMem, // if >=s, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovge{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_GE, EFLAGS))]>, TB; -def CMOVLE64rm: RI<0x4E, MRMSrcMem, // if <=s, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovle{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_LE, EFLAGS))]>, TB; -def CMOVG64rm : RI<0x4F, MRMSrcMem, // if >s, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovg{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_G, EFLAGS))]>, TB; -def CMOVS64rm : RI<0x48, MRMSrcMem, // if signed, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovs{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_S, EFLAGS))]>, TB; -def CMOVNS64rm: RI<0x49, MRMSrcMem, // if !signed, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovns{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_NS, EFLAGS))]>, TB; -def CMOVP64rm : RI<0x4A, MRMSrcMem, // if parity, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovp{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_P, EFLAGS))]>, TB; -def CMOVNP64rm : RI<0x4B, MRMSrcMem, // if !parity, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovnp{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_NP, EFLAGS))]>, TB; -def CMOVO64rm : RI<0x40, MRMSrcMem, // if overflow, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovo{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_O, EFLAGS))]>, TB; -def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovno{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_NO, EFLAGS))]>, TB; -} // Constraints = "$src1 = $dst" - -// Use sbb to materialize carry flag into a GPR. -// FIXME: This are pseudo ops that should be replaced with Pat<> patterns. -// However, Pat<> can't replicate the destination reg into the inputs of the -// result. -// FIXME: Change this to have encoding Pseudo when X86MCCodeEmitter replaces -// X86CodeEmitter. -let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in -def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; - -def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C64r)>; - -//===----------------------------------------------------------------------===// -// Descriptor-table support instructions - -// LLDT is not interpreted specially in 64-bit mode because there is no sign -// extension. -def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins), - "sldt{q}\t$dst", []>, TB; -def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), - "sldt{q}\t$dst", []>, TB; - -//===----------------------------------------------------------------------===// -// Alias Instructions -//===----------------------------------------------------------------------===// - -// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a -// smaller encoding, but doing so at isel time interferes with rematerialization -// in the current register allocator. For now, this is rewritten when the -// instruction is lowered to an MCInst. -// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove -// when we have a better way to specify isel priority. -let Defs = [EFLAGS], - AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)]>; - -// Materialize i64 constant where top 32-bits are zero. This could theoretically -// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however -// that would make it more difficult to rematerialize. -let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), - "", [(set GR64:$dst, i64immZExt32:$src)]>; - -//===----------------------------------------------------------------------===// -// Thread Local Storage Instructions -//===----------------------------------------------------------------------===// - -// ELF TLS Support -// All calls clobber the non-callee saved registers. RSP is marked as -// a use to prevent stack-pointer assignments that appear immediately -// before calls from potentially appearing dead. -let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in -def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), - ".byte\t0x66; " - "leaq\t$sym(%rip), %rdi; " - ".word\t0x6666; " - "rex64; " - "call\t__tls_get_addr@PLT", - [(X86tlsaddr tls64addr:$sym)]>, - Requires<[In64BitMode]>; - -// Darwin TLS Support -// For x86_64, the address of the thunk is passed in %rdi, on return -// the address of the variable is in %rax. All other registers are preserved. -let Defs = [RAX], - Uses = [RDI], - usesCustomInserter = 1 in -def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), - "# TLSCall_64", - [(X86TLSCall addr:$sym)]>, - Requires<[In64BitMode]>; - -let AddedComplexity = 5, isCodeGenOnly = 1 in -def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "movq\t%gs:$src, $dst", - [(set GR64:$dst, (gsload addr:$src))]>, SegGS; - -let AddedComplexity = 5, isCodeGenOnly = 1 in -def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "movq\t%fs:$src, $dst", - [(set GR64:$dst, (fsload addr:$src))]>, SegFS; - -//===----------------------------------------------------------------------===// -// Atomic Instructions -//===----------------------------------------------------------------------===// - -// TODO: Get this to fold the constant into the instruction. -let hasSideEffects = 1, Defs = [ESP] in -def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero), - "lock\n\t" - "or{q}\t{$zero, (%rsp)|(%rsp), $zero}", - [(X86MemBarrierNoSSE GR64:$zero)]>, - Requires<[In64BitMode]>, LOCK; - -let Defs = [RAX, EFLAGS], Uses = [RAX] in { -def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), - "lock\n\t" - "cmpxchgq\t$swap,$ptr", - [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; -} - -let Constraints = "$val = $dst" in { -let Defs = [EFLAGS] in -def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr), - "lock\n\t" - "xadd\t$val, $ptr", - [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, - TB, LOCK; - -def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$val,i64mem:$ptr), - "xchg{q}\t{$val, $ptr|$ptr, $val}", - [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>; - -def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), - "xchg{q}\t{$val, $src|$src, $val}", []>; -} - -def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; -let mayLoad = 1, mayStore = 1 in -def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; - -def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; -let mayLoad = 1, mayStore = 1 in -def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; - -let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in -def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), - "cmpxchg16b\t$dst", []>, TB; - -def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), - "xchg{q}\t{$src, %rax|%rax, $src}", []>; - -// Optimized codegen when the non-memory output is not used. -let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { -// FIXME: Use normal add / sub instructions and add lock prefix dynamically. -def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "lock\n\t" - "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs), - (ins i64mem:$dst, i64i8imm :$src2), - "lock\n\t" - "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs), - (ins i64mem:$dst, i64i32imm :$src2), - "lock\n\t" - "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "lock\n\t" - "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs), - (ins i64mem:$dst, i64i8imm :$src2), - "lock\n\t" - "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs), - (ins i64mem:$dst, i64i32imm:$src2), - "lock\n\t" - "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), - "lock\n\t" - "inc{q}\t$dst", []>, LOCK; -def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), - "lock\n\t" - "dec{q}\t$dst", []>, LOCK; -} -// Atomic exchange, and, or, xor -let Constraints = "$val = $dst", Defs = [EFLAGS], - usesCustomInserter = 1 in { -def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMAND64 PSEUDO!", - [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>; -def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMOR64 PSEUDO!", - [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>; -def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMXOR64 PSEUDO!", - [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>; -def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMNAND64 PSEUDO!", - [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>; -def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val), - "#ATOMMIN64 PSEUDO!", - [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>; -def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMMAX64 PSEUDO!", - [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>; -def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMUMIN64 PSEUDO!", - [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>; -def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMUMAX64 PSEUDO!", - [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>; -} - -// Segmentation support instructions - -// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo. -def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; -def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), - "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; - -def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; -def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; - -def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB; - -def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), - "push{q}\t%fs", []>, TB; -def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), - "push{q}\t%gs", []>, TB; - -def POPFS64 : I<0xa1, RawFrm, (outs), (ins), - "pop{q}\t%fs", []>, TB; -def POPGS64 : I<0xa9, RawFrm, (outs), (ins), - "pop{q}\t%gs", []>, TB; - -def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), - "lss{q}\t{$src, $dst|$dst, $src}", []>, TB; -def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), - "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB; -def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), - "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB; - -// Specialized register support - -// no m form encodable; use SMSW16m -def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), - "smsw{q}\t$dst", []>, TB; - -// String manipulation instructions - -def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>; - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -//===----------------------------------------------------------------------===// - -// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small -// code model mode, should use 'movabs'. FIXME: This is really a hack, the -// 'movabs' predicate should handle this sort of thing. -def : Pat<(i64 (X86Wrapper tconstpool :$dst)), - (MOV64ri tconstpool :$dst)>, Requires<[FarData]>; -def : Pat<(i64 (X86Wrapper tjumptable :$dst)), - (MOV64ri tjumptable :$dst)>, Requires<[FarData]>; -def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>; -def : Pat<(i64 (X86Wrapper texternalsym:$dst)), - (MOV64ri texternalsym:$dst)>, Requires<[FarData]>; -def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), - (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>; - -// In static codegen with small code model, we can get the address of a label -// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of -// the MOV64ri64i32 should accept these. -def : Pat<(i64 (X86Wrapper tconstpool :$dst)), - (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>; -def : Pat<(i64 (X86Wrapper tjumptable :$dst)), - (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>; -def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>; -def : Pat<(i64 (X86Wrapper texternalsym:$dst)), - (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>; -def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), - (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>; - -// In kernel code model, we can get the address of a label -// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of -// the MOV64ri32 should accept these. -def : Pat<(i64 (X86Wrapper tconstpool :$dst)), - (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; -def : Pat<(i64 (X86Wrapper tjumptable :$dst)), - (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; -def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; -def : Pat<(i64 (X86Wrapper texternalsym:$dst)), - (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; -def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), - (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>; - -// If we have small model and -static mode, it is safe to store global addresses -// directly as immediates. FIXME: This is really a hack, the 'imm' predicate -// for MOV64mi32 should handle this sort of thing. -def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tconstpool:$src)>, - Requires<[NearData, IsStatic]>; -def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tjumptable:$src)>, - Requires<[NearData, IsStatic]>; -def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tglobaladdr:$src)>, - Requires<[NearData, IsStatic]>; -def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), - (MOV64mi32 addr:$dst, texternalsym:$src)>, - Requires<[NearData, IsStatic]>; -def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tblockaddress:$src)>, - Requires<[NearData, IsStatic]>; - -// Calls -// Direct PC relative function call for small code model. 32-bit displacement -// sign extended to 64-bit. -def : Pat<(X86call (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>; -def : Pat<(X86call (i64 texternalsym:$dst)), - (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>; - -def : Pat<(X86call (i64 tglobaladdr:$dst)), - (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>; -def : Pat<(X86call (i64 texternalsym:$dst)), - (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>; - -// tailcall stuff -def : Pat<(X86tcret GR64_TC:$dst, imm:$off), - (TCRETURNri64 GR64_TC:$dst, imm:$off)>, - Requires<[In64BitMode]>; - -def : Pat<(X86tcret (load addr:$dst), imm:$off), - (TCRETURNmi64 addr:$dst, imm:$off)>, - Requires<[In64BitMode]>; - -def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), - (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, - Requires<[In64BitMode]>; - -def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), - (TCRETURNdi64 texternalsym:$dst, imm:$off)>, - Requires<[In64BitMode]>; - -// tls has some funny stuff here... -// This corresponds to movabs $foo@tpoff, %rax -def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), - (MOV64ri tglobaltlsaddr :$dst)>; -// This corresponds to add $foo@tpoff, %rax -def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), - (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; -// This corresponds to mov foo@tpoff(%rbx), %eax -def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), - (MOV64rm tglobaltlsaddr :$dst)>; - -// Comparisons. - -// TEST R,R is smaller than CMP R,0 -def : Pat<(X86cmp GR64:$src1, 0), - (TEST64rr GR64:$src1, GR64:$src1)>; - -// Conditional moves with folded loads with operands swapped and conditions -// inverted. -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_B, EFLAGS), - (CMOVAE64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_AE, EFLAGS), - (CMOVB64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_E, EFLAGS), - (CMOVNE64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NE, EFLAGS), - (CMOVE64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_BE, EFLAGS), - (CMOVA64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_A, EFLAGS), - (CMOVBE64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_L, EFLAGS), - (CMOVGE64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_GE, EFLAGS), - (CMOVL64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_LE, EFLAGS), - (CMOVG64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_G, EFLAGS), - (CMOVLE64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_P, EFLAGS), - (CMOVNP64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NP, EFLAGS), - (CMOVP64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_S, EFLAGS), - (CMOVNS64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NS, EFLAGS), - (CMOVS64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS), - (CMOVNO64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS), - (CMOVO64rm GR64:$src2, addr:$src1)>; - -// zextload bool -> zextload byte -def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; - -// extload -// When extloading from 16-bit and smaller memory locations into 64-bit -// registers, use zero-extending loads so that the entire 64-bit register is -// defined, avoiding partial-register updates. -def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; -def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>; -def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; -// For other extloads, use subregs, since the high contents of the register are -// defined after an extload. -def : Pat<(extloadi64i32 addr:$src), - (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), - sub_32bit)>; - -// anyext. Define these to do an explicit zero-extend to -// avoid partial-register updates. -def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; -def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; -def : Pat<(i64 (anyext GR32:$src)), - (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; - -//===----------------------------------------------------------------------===// -// Some peepholes -//===----------------------------------------------------------------------===// - -// Odd encoding trick: -128 fits into an 8-bit immediate field while -// +128 doesn't, so in this special case use a sub instead of an add. -def : Pat<(add GR64:$src1, 128), - (SUB64ri8 GR64:$src1, -128)>; -def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst), - (SUB64mi8 addr:$dst, -128)>; - -// The same trick applies for 32-bit immediate fields in 64-bit -// instructions. -def : Pat<(add GR64:$src1, 0x0000000080000000), - (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; -def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), - (SUB64mi32 addr:$dst, 0xffffffff80000000)>; - -// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it -// has an immediate with at least 32 bits of leading zeros, to avoid needing to -// materialize that immediate in a register first. -def : Pat<(and GR64:$src, i64immZExt32:$imm), - (SUBREG_TO_REG - (i64 0), - (AND32ri - (EXTRACT_SUBREG GR64:$src, sub_32bit), - (i32 (GetLo32XForm imm:$imm))), - sub_32bit)>; - -// r & (2^32-1) ==> movz -def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), - (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; -// r & (2^16-1) ==> movz -def : Pat<(and GR64:$src, 0xffff), - (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>; -// r & (2^8-1) ==> movz -def : Pat<(and GR64:$src, 0xff), - (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>; -// r & (2^8-1) ==> movz -def : Pat<(and GR32:$src1, 0xff), - (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>, - Requires<[In64BitMode]>; -// r & (2^8-1) ==> movz -def : Pat<(and GR16:$src1, 0xff), - (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>, - Requires<[In64BitMode]>; - -// sext_inreg patterns -def : Pat<(sext_inreg GR64:$src, i32), - (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; -def : Pat<(sext_inreg GR64:$src, i16), - (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>; -def : Pat<(sext_inreg GR64:$src, i8), - (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>; -def : Pat<(sext_inreg GR32:$src, i8), - (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>, - Requires<[In64BitMode]>; -def : Pat<(sext_inreg GR16:$src, i8), - (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>, - Requires<[In64BitMode]>; - -// trunc patterns -def : Pat<(i32 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, sub_32bit)>; -def : Pat<(i16 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, sub_16bit)>; -def : Pat<(i8 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, sub_8bit)>; -def : Pat<(i8 (trunc GR32:$src)), - (EXTRACT_SUBREG GR32:$src, sub_8bit)>, - Requires<[In64BitMode]>; -def : Pat<(i8 (trunc GR16:$src)), - (EXTRACT_SUBREG GR16:$src, sub_8bit)>, - Requires<[In64BitMode]>; - -// h-register tricks. -// For now, be conservative on x86-64 and use an h-register extract only if the -// value is immediately zero-extended or stored, which are somewhat common -// cases. This uses a bunch of code to prevent a register requiring a REX prefix -// from being allocated in the same instruction as the h register, as there's -// currently no way to describe this requirement to the register allocator. - -// h-register extract and zero-extend. -def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), - (SUBREG_TO_REG - (i64 0), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), - sub_8bit_hi)), - sub_32bit)>; -def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), - (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, - GR32_ABCD)), - sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(srl GR16:$src, (i8 8)), - (EXTRACT_SUBREG - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi)), - sub_16bit)>, - Requires<[In64BitMode]>; -def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), - (SUBREG_TO_REG - (i64 0), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi)), - sub_32bit)>; -def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), - (SUBREG_TO_REG - (i64 0), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi)), - sub_32bit)>; - -// h-register extract and store. -def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), - (MOV8mr_NOREX - addr:$dst, - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), - sub_8bit_hi))>; -def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), - (MOV8mr_NOREX - addr:$dst, - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), - (MOV8mr_NOREX - addr:$dst, - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi))>, - Requires<[In64BitMode]>; - -// (shl x, 1) ==> (add x, x) -def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; - -// (shl x (and y, 63)) ==> (shl x, y) -def : Pat<(shl GR64:$src1, (and CL, 63)), - (SHL64rCL GR64:$src1)>; -def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), - (SHL64mCL addr:$dst)>; - -def : Pat<(srl GR64:$src1, (and CL, 63)), - (SHR64rCL GR64:$src1)>; -def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), - (SHR64mCL addr:$dst)>; - -def : Pat<(sra GR64:$src1, (and CL, 63)), - (SAR64rCL GR64:$src1)>; -def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), - (SAR64mCL addr:$dst)>; - -// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. -let AddedComplexity = 5 in { // Try this before the selecting to OR -def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2), - (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2), - (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(or_is_add GR64:$src1, GR64:$src2), - (ADD64rr GR64:$src1, GR64:$src2)>; -} // AddedComplexity - -// X86 specific add which produces a flag. -def : Pat<(addc GR64:$src1, GR64:$src2), - (ADD64rr GR64:$src1, GR64:$src2)>; -def : Pat<(addc GR64:$src1, (load addr:$src2)), - (ADD64rm GR64:$src1, addr:$src2)>; -def : Pat<(addc GR64:$src1, i64immSExt8:$src2), - (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(addc GR64:$src1, i64immSExt32:$src2), - (ADD64ri32 GR64:$src1, imm:$src2)>; - -def : Pat<(subc GR64:$src1, GR64:$src2), - (SUB64rr GR64:$src1, GR64:$src2)>; -def : Pat<(subc GR64:$src1, (load addr:$src2)), - (SUB64rm GR64:$src1, addr:$src2)>; -def : Pat<(subc GR64:$src1, i64immSExt8:$src2), - (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(subc GR64:$src1, imm:$src2), - (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; - -//===----------------------------------------------------------------------===// -// EFLAGS-defining Patterns -//===----------------------------------------------------------------------===// - -// addition -def : Pat<(add GR64:$src1, GR64:$src2), - (ADD64rr GR64:$src1, GR64:$src2)>; -def : Pat<(add GR64:$src1, i64immSExt8:$src2), - (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(add GR64:$src1, i64immSExt32:$src2), - (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), - (ADD64rm GR64:$src1, addr:$src2)>; - -// subtraction -def : Pat<(sub GR64:$src1, GR64:$src2), - (SUB64rr GR64:$src1, GR64:$src2)>; -def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), - (SUB64rm GR64:$src1, addr:$src2)>; -def : Pat<(sub GR64:$src1, i64immSExt8:$src2), - (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(sub GR64:$src1, i64immSExt32:$src2), - (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// Multiply -def : Pat<(mul GR64:$src1, GR64:$src2), - (IMUL64rr GR64:$src1, GR64:$src2)>; -def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), - (IMUL64rm GR64:$src1, addr:$src2)>; -def : Pat<(mul GR64:$src1, i64immSExt8:$src2), - (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(mul GR64:$src1, i64immSExt32:$src2), - (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2), - (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>; -def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), - (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; - -// inc/dec -def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; -def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; - -// or -def : Pat<(or GR64:$src1, GR64:$src2), - (OR64rr GR64:$src1, GR64:$src2)>; -def : Pat<(or GR64:$src1, i64immSExt8:$src2), - (OR64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(or GR64:$src1, i64immSExt32:$src2), - (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), - (OR64rm GR64:$src1, addr:$src2)>; - -// xor -def : Pat<(xor GR64:$src1, GR64:$src2), - (XOR64rr GR64:$src1, GR64:$src2)>; -def : Pat<(xor GR64:$src1, i64immSExt8:$src2), - (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(xor GR64:$src1, i64immSExt32:$src2), - (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), - (XOR64rm GR64:$src1, addr:$src2)>; - -// and -def : Pat<(and GR64:$src1, GR64:$src2), - (AND64rr GR64:$src1, GR64:$src2)>; -def : Pat<(and GR64:$src1, i64immSExt8:$src2), - (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(and GR64:$src1, i64immSExt32:$src2), - (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), - (AND64rm GR64:$src1, addr:$src2)>; - -//===----------------------------------------------------------------------===// -// X86-64 SSE Instructions -//===----------------------------------------------------------------------===// - -// Move instructions... - -def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2i64 (scalar_to_vector GR64:$src)))]>; -def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), - (iPTR 0)))]>; - -def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (bitconvert GR64:$src))]>; -def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), - "movq\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>; - -def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (bitconvert FR64:$src))]>; -def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), - "movq\t{$src, $dst|$dst, $src}", - [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; - diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td new file mode 100644 index 0000000..f0ea068 --- /dev/null +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -0,0 +1,1125 @@ +//===- X86InstrArithmetic.td - Integer Arithmetic Instrs ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the integer arithmetic instructions in the X86 +// architecture. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LEA - Load Effective Address + +let neverHasSideEffects = 1 in +def LEA16r : I<0x8D, MRMSrcMem, + (outs GR16:$dst), (ins i32mem:$src), + "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; +let isReMaterializable = 1 in +def LEA32r : I<0x8D, MRMSrcMem, + (outs GR32:$dst), (ins i32mem:$src), + "lea{l}\t{$src|$dst}, {$dst|$src}", + [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; + +def LEA64_32r : I<0x8D, MRMSrcMem, + (outs GR32:$dst), (ins lea64_32mem:$src), + "lea{l}\t{$src|$dst}, {$dst|$src}", + [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>; + +let isReMaterializable = 1 in +def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "lea{q}\t{$src|$dst}, {$dst|$src}", + [(set GR64:$dst, lea64addr:$src)]>; + + + +//===----------------------------------------------------------------------===// +// Fixed-Register Multiplication and Division Instructions. +// + +// Extra precision multiplication + +// AL is really implied by AX, but the registers in Defs must match the +// SDNode results (i8, i32). +let Defs = [AL,EFLAGS,AX], Uses = [AL] in +def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", + // FIXME: Used for 8-bit mul, ignore result upper 8 bits. + // This probably ought to be moved to a def : Pat<> if the + // syntax can be accepted. + [(set AL, (mul AL, GR8:$src)), + (implicit EFLAGS)]>; // AL,AH = AL*GR8 + +let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in +def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), + "mul{w}\t$src", + []>, OpSize; // AX,DX = AX*GR16 + +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in +def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), + "mul{l}\t$src", // EAX,EDX = EAX*GR32 + [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>; +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in +def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), + "mul{q}\t$src", // RAX,RDX = RAX*GR64 + [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>; + +let Defs = [AL,EFLAGS,AX], Uses = [AL] in +def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), + "mul{b}\t$src", + // FIXME: Used for 8-bit mul, ignore result upper 8 bits. + // This probably ought to be moved to a def : Pat<> if the + // syntax can be accepted. + [(set AL, (mul AL, (loadi8 addr:$src))), + (implicit EFLAGS)]>; // AL,AH = AL*[mem8] + +let mayLoad = 1, neverHasSideEffects = 1 in { +let Defs = [AX,DX,EFLAGS], Uses = [AX] in +def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), + "mul{w}\t$src", + []>, OpSize; // AX,DX = AX*[mem16] + +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in +def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), + "mul{l}\t$src", + []>; // EAX,EDX = EAX*[mem32] +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in +def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), + "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] +} + +let neverHasSideEffects = 1 in { +let Defs = [AL,EFLAGS,AX], Uses = [AL] in +def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; + // AL,AH = AL*GR8 +let Defs = [AX,DX,EFLAGS], Uses = [AX] in +def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>, + OpSize; // AX,DX = AX*GR16 +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in +def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; + // EAX,EDX = EAX*GR32 +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in +def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>; + // RAX,RDX = RAX*GR64 + +let mayLoad = 1 in { +let Defs = [AL,EFLAGS,AX], Uses = [AL] in +def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), + "imul{b}\t$src", []>; // AL,AH = AL*[mem8] +let Defs = [AX,DX,EFLAGS], Uses = [AX] in +def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), + "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16] +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in +def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), + "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32] +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in +def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), + "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] +} +} // neverHasSideEffects + + +let Defs = [EFLAGS] in { +let Constraints = "$src1 = $dst" in { + +let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y +// Register-Register Signed Integer Multiply +def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), + "imul{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize; +def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), + "imul{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, EFLAGS, + (X86smul_flag GR32:$src1, GR32:$src2))]>, TB; +def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "imul{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, EFLAGS, + (X86smul_flag GR64:$src1, GR64:$src2))]>, TB; +} + +// Register-Memory Signed Integer Multiply +def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), + (ins GR16:$src1, i16mem:$src2), + "imul{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, (load addr:$src2)))]>, + TB, OpSize; +def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$src1, i32mem:$src2), + "imul{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, EFLAGS, + (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB; +def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$src1, i64mem:$src2), + "imul{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, EFLAGS, + (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB; +} // Constraints = "$src1 = $dst" + +} // Defs = [EFLAGS] + +// Suprisingly enough, these are not two address instructions! +let Defs = [EFLAGS] in { +// Register-Integer Signed Integer Multiply +def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 + (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize; +def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 + (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), + "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>, + OpSize; +def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32 + (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, EFLAGS, + (X86smul_flag GR32:$src1, imm:$src2))]>; +def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8 + (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), + "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, EFLAGS, + (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>; +def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32 + (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), + "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, EFLAGS, + (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>; +def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 + (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), + "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, EFLAGS, + (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>; + + +// Memory-Integer Signed Integer Multiply +def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 + (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), + "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR16:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), imm:$src2))]>, + OpSize; +def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8 + (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2), + "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR16:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), + i16immSExt8:$src2))]>, OpSize; +def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 + (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2), + "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), imm:$src2))]>; +def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8 + (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2), + "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), + i32immSExt8:$src2))]>; +def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32 + (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2), + "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), + i64immSExt32:$src2))]>; +def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 + (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2), + "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), + i64immSExt8:$src2))]>; +} // Defs = [EFLAGS] + + + + +// unsigned division/remainder +let Defs = [AL,EFLAGS,AX], Uses = [AX] in +def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH + "div{b}\t$src", []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in +def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX + "div{w}\t$src", []>, OpSize; +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in +def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX + "div{l}\t$src", []>; +// RDX:RAX/r64 = RAX,RDX +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in +def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), + "div{q}\t$src", []>; + +let mayLoad = 1 in { +let Defs = [AL,EFLAGS,AX], Uses = [AX] in +def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH + "div{b}\t$src", []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in +def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX + "div{w}\t$src", []>, OpSize; +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX +def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), + "div{l}\t$src", []>; +// RDX:RAX/[mem64] = RAX,RDX +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in +def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), + "div{q}\t$src", []>; +} + +// Signed division/remainder. +let Defs = [AL,EFLAGS,AX], Uses = [AX] in +def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH + "idiv{b}\t$src", []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in +def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX + "idiv{w}\t$src", []>, OpSize; +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in +def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX + "idiv{l}\t$src", []>; +// RDX:RAX/r64 = RAX,RDX +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in +def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), + "idiv{q}\t$src", []>; + +let mayLoad = 1, mayLoad = 1 in { +let Defs = [AL,EFLAGS,AX], Uses = [AX] in +def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH + "idiv{b}\t$src", []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in +def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX + "idiv{w}\t$src", []>, OpSize; +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX +def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), + "idiv{l}\t$src", []>; +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX +def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), + "idiv{q}\t$src", []>; +} + +//===----------------------------------------------------------------------===// +// Two address Instructions. +// + +// unary instructions +let CodeSize = 2 in { +let Defs = [EFLAGS] in { +let Constraints = "$src1 = $dst" in { +def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), + "neg{b}\t$dst", + [(set GR8:$dst, (ineg GR8:$src1)), + (implicit EFLAGS)]>; +def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1), + "neg{w}\t$dst", + [(set GR16:$dst, (ineg GR16:$src1)), + (implicit EFLAGS)]>, OpSize; +def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), + "neg{l}\t$dst", + [(set GR32:$dst, (ineg GR32:$src1)), + (implicit EFLAGS)]>; +def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst", + [(set GR64:$dst, (ineg GR64:$src1)), + (implicit EFLAGS)]>; +} // Constraints = "$src1 = $dst" + +def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), + "neg{b}\t$dst", + [(store (ineg (loadi8 addr:$dst)), addr:$dst), + (implicit EFLAGS)]>; +def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), + "neg{w}\t$dst", + [(store (ineg (loadi16 addr:$dst)), addr:$dst), + (implicit EFLAGS)]>, OpSize; +def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), + "neg{l}\t$dst", + [(store (ineg (loadi32 addr:$dst)), addr:$dst), + (implicit EFLAGS)]>; +def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", + [(store (ineg (loadi64 addr:$dst)), addr:$dst), + (implicit EFLAGS)]>; +} // Defs = [EFLAGS] + + +// Note: NOT does not set EFLAGS! + +let Constraints = "$src1 = $dst" in { +// Match xor -1 to not. Favors these over a move imm + xor to save code size. +let AddedComplexity = 15 in { +def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), + "not{b}\t$dst", + [(set GR8:$dst, (not GR8:$src1))]>; +def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1), + "not{w}\t$dst", + [(set GR16:$dst, (not GR16:$src1))]>, OpSize; +def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), + "not{l}\t$dst", + [(set GR32:$dst, (not GR32:$src1))]>; +def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst", + [(set GR64:$dst, (not GR64:$src1))]>; +} +} // Constraints = "$src1 = $dst" + +def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), + "not{b}\t$dst", + [(store (not (loadi8 addr:$dst)), addr:$dst)]>; +def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), + "not{w}\t$dst", + [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize; +def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), + "not{l}\t$dst", + [(store (not (loadi32 addr:$dst)), addr:$dst)]>; +def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", + [(store (not (loadi64 addr:$dst)), addr:$dst)]>; +} // CodeSize + +// TODO: inc/dec is slow for P4, but fast for Pentium-M. +let Defs = [EFLAGS] in { +let Constraints = "$src1 = $dst" in { +let CodeSize = 2 in +def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), + "inc{b}\t$dst", + [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>; + +let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. +def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), + "inc{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, + OpSize, Requires<[In32BitMode]>; +def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), + "inc{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, + Requires<[In32BitMode]>; +def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst", + [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>; +} // isConvertibleToThreeAddress = 1, CodeSize = 1 + + +// In 64-bit mode, single byte INC and DEC cannot be encoded. +let isConvertibleToThreeAddress = 1, CodeSize = 2 in { +// Can transform into LEA. +def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), + "inc{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, + OpSize, Requires<[In64BitMode]>; +def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), + "inc{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, + Requires<[In64BitMode]>; +def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), + "dec{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, + OpSize, Requires<[In64BitMode]>; +def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), + "dec{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, + Requires<[In64BitMode]>; +} // isConvertibleToThreeAddress = 1, CodeSize = 2 + +} // Constraints = "$src1 = $dst" + +let CodeSize = 2 in { + def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", + [(store (add (loadi8 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>; + def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", + [(store (add (loadi16 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In32BitMode]>; + def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", + [(store (add (loadi32 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In32BitMode]>; + def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", + [(store (add (loadi64 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>; + +// These are duplicates of their 32-bit counterparts. Only needed so X86 knows +// how to unfold them. +// FIXME: What is this for?? +def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", + [(store (add (loadi16 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; +def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", + [(store (add (loadi32 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; +def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", + [(store (add (loadi16 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; +def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", + [(store (add (loadi32 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; +} // CodeSize = 2 + +let Constraints = "$src1 = $dst" in { +let CodeSize = 2 in +def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), + "dec{b}\t$dst", + [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>; +let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. +def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), + "dec{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, + OpSize, Requires<[In32BitMode]>; +def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), + "dec{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, + Requires<[In32BitMode]>; +def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", + [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>; +} // CodeSize = 2 +} // Constraints = "$src1 = $dst" + + +let CodeSize = 2 in { + def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", + [(store (add (loadi8 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>; + def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", + [(store (add (loadi16 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In32BitMode]>; + def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", + [(store (add (loadi32 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In32BitMode]>; + def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", + [(store (add (loadi64 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>; +} // CodeSize = 2 +} // Defs = [EFLAGS] + + +/// X86TypeInfo - This is a bunch of information that describes relevant X86 +/// information about value types. For example, it can tell you what the +/// register class and preferred load to use. +class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass, + PatFrag loadnode, X86MemOperand memoperand, ImmType immkind, + Operand immoperand, SDPatternOperator immoperator, + Operand imm8operand, SDPatternOperator imm8operator, + bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> { + /// VT - This is the value type itself. + ValueType VT = vt; + + /// InstrSuffix - This is the suffix used on instructions with this type. For + /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q". + string InstrSuffix = instrsuffix; + + /// RegClass - This is the register class associated with this type. For + /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64. + RegisterClass RegClass = regclass; + + /// LoadNode - This is the load node associated with this type. For + /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64. + PatFrag LoadNode = loadnode; + + /// MemOperand - This is the memory operand associated with this type. For + /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem. + X86MemOperand MemOperand = memoperand; + + /// ImmEncoding - This is the encoding of an immediate of this type. For + /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32 + /// since the immediate fields of i64 instructions is a 32-bit sign extended + /// value. + ImmType ImmEncoding = immkind; + + /// ImmOperand - This is the operand kind of an immediate of this type. For + /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 -> + /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign + /// extended value. + Operand ImmOperand = immoperand; + + /// ImmOperator - This is the operator that should be used to match an + /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32). + SDPatternOperator ImmOperator = immoperator; + + /// Imm8Operand - This is the operand kind to use for an imm8 of this type. + /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is + /// only used for instructions that have a sign-extended imm8 field form. + Operand Imm8Operand = imm8operand; + + /// Imm8Operator - This is the operator that should be used to match an 8-bit + /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8). + SDPatternOperator Imm8Operator = imm8operator; + + /// HasOddOpcode - This bit is true if the instruction should have an odd (as + /// opposed to even) opcode. Operations on i8 are usually even, operations on + /// other datatypes are odd. + bit HasOddOpcode = hasOddOpcode; + + /// HasOpSizePrefix - This bit is set to true if the instruction should have + /// the 0x66 operand size prefix. This is set for i16 types. + bit HasOpSizePrefix = hasOpSizePrefix; + + /// HasREX_WPrefix - This bit is set to true if the instruction should have + /// the 0x40 REX prefix. This is set for i64 types. + bit HasREX_WPrefix = hasREX_WPrefix; +} + +def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">; + + +def Xi8 : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem , + Imm8 , i8imm , imm, i8imm , invalid_node, + 0, 0, 0>; +def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem, + Imm16, i16imm, imm, i16i8imm, i16immSExt8, + 1, 1, 0>; +def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem, + Imm32, i32imm, imm, i32i8imm, i32immSExt8, + 1, 0, 0>; +def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem, + Imm32, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8, + 1, 0, 1>; + +/// ITy - This instruction base class takes the type info for the instruction. +/// Using this, it: +/// 1. Concatenates together the instruction mnemonic with the appropriate +/// suffix letter, a tab, and the arguments. +/// 2. Infers whether the instruction should have a 0x66 prefix byte. +/// 3. Infers whether the instruction should have a 0x40 REX_W prefix. +/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations) +/// or 1 (for i16,i32,i64 operations). +class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, + string mnemonic, string args, list<dag> pattern> + : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4}, + opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode }, + f, outs, ins, + !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> { + + // Infer instruction prefixes from type info. + let hasOpSizePrefix = typeinfo.HasOpSizePrefix; + let hasREX_WPrefix = typeinfo.HasREX_WPrefix; +} + +// BinOpRR - Instructions like "add reg, reg, reg". +class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + dag outlist, list<dag> pattern, Format f = MRMDestReg> + : ITy<opcode, f, typeinfo, outlist, + (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), + mnemonic, "{$src2, $src1|$src1, $src2}", pattern>; + +// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has +// just a regclass (no eflags) as a result. +class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>; + +// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has +// just a EFLAGS as a result. +class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDPatternOperator opnode, Format f = MRMDestReg> + : BinOpRR<opcode, mnemonic, typeinfo, (outs), + [(set EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], + f>; + +// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has +// both a regclass and EFLAGS as a result. +class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>; + +// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has +// both a regclass and EFLAGS as a result, and has EFLAGS as input. +class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2, + EFLAGS))]>; + +// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding). +class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> + : ITy<opcode, MRMSrcReg, typeinfo, + (outs typeinfo.RegClass:$dst), + (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), + mnemonic, "{$src2, $dst|$dst, $src2}", []> { + // The disassembler should know about this, but not the asmparser. + let isCodeGenOnly = 1; +} + +// BinOpRM - Instructions like "add reg, reg, [mem]". +class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + dag outlist, list<dag> pattern> + : ITy<opcode, MRMSrcMem, typeinfo, outlist, + (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), + mnemonic, "{$src2, $src1|$src1, $src2}", pattern>; + +// BinOpRM_R - Instructions like "add reg, reg, [mem]". +class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, + (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>; + +// BinOpRM_F - Instructions like "cmp reg, [mem]". +class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDPatternOperator opnode> + : BinOpRM<opcode, mnemonic, typeinfo, (outs), + [(set EFLAGS, + (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>; + +// BinOpRM_RF - Instructions like "add reg, reg, [mem]". +class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>; + +// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]". +class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2), + EFLAGS))]>; + +// BinOpRI - Instructions like "add reg, reg, imm". +class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + Format f, dag outlist, list<dag> pattern> + : ITy<opcode, f, typeinfo, outlist, + (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2), + mnemonic, "{$src2, $src1|$src1, $src2}", pattern> { + let ImmT = typeinfo.ImmEncoding; +} + +// BinOpRI_R - Instructions like "add reg, reg, imm". +class BinOpRI_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, + (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>; + +// BinOpRI_F - Instructions like "cmp reg, imm". +class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDPatternOperator opnode, Format f> + : BinOpRI<opcode, mnemonic, typeinfo, f, (outs), + [(set EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>; + +// BinOpRI_RF - Instructions like "add reg, reg, imm". +class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>; + +// BinOpRI_RFF - Instructions like "adc reg, reg, imm". +class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2, + EFLAGS))]>; + +// BinOpRI8 - Instructions like "add reg, reg, imm8". +class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + Format f, dag outlist, list<dag> pattern> + : ITy<opcode, f, typeinfo, outlist, + (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2), + mnemonic, "{$src2, $src1|$src1, $src2}", pattern> { + let ImmT = Imm8; // Always 8-bit immediate. +} + +// BinOpRI8_R - Instructions like "add reg, reg, imm8". +class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, + (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>; + +// BinOpRI8_F - Instructions like "cmp reg, imm8". +class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs), + [(set EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>; + +// BinOpRI8_RF - Instructions like "add reg, reg, imm8". +class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>; + +// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8". +class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2, + EFLAGS))]>; + +// BinOpMR - Instructions like "add [mem], reg". +class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + list<dag> pattern> + : ITy<opcode, MRMDestMem, typeinfo, + (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src), + mnemonic, "{$src, $dst|$dst, $src}", pattern>; + +// BinOpMR_RMW - Instructions like "add [mem], reg". +class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpMR<opcode, mnemonic, typeinfo, + [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst), + (implicit EFLAGS)]>; + +// BinOpMR_RMW_FF - Instructions like "adc [mem], reg". +class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpMR<opcode, mnemonic, typeinfo, + [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS), + addr:$dst), + (implicit EFLAGS)]>; + +// BinOpMR_F - Instructions like "cmp [mem], reg". +class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpMR<opcode, mnemonic, typeinfo, + [(set EFLAGS, (opnode (load addr:$dst), typeinfo.RegClass:$src))]>; + +// BinOpMI - Instructions like "add [mem], imm". +class BinOpMI<string mnemonic, X86TypeInfo typeinfo, + Format f, list<dag> pattern, bits<8> opcode = 0x80> + : ITy<opcode, f, typeinfo, + (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src), + mnemonic, "{$src, $dst|$dst, $src}", pattern> { + let ImmT = typeinfo.ImmEncoding; +} + +// BinOpMI_RMW - Instructions like "add [mem], imm". +class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpMI<mnemonic, typeinfo, f, + [(store (opnode (typeinfo.VT (load addr:$dst)), + typeinfo.ImmOperator:$src), addr:$dst), + (implicit EFLAGS)]>; + +// BinOpMI_RMW_FF - Instructions like "adc [mem], imm". +class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpMI<mnemonic, typeinfo, f, + [(store (opnode (typeinfo.VT (load addr:$dst)), + typeinfo.ImmOperator:$src, EFLAGS), addr:$dst), + (implicit EFLAGS)]>; + +// BinOpMI_F - Instructions like "cmp [mem], imm". +class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo, + SDPatternOperator opnode, Format f, bits<8> opcode = 0x80> + : BinOpMI<mnemonic, typeinfo, f, + [(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)), + typeinfo.ImmOperator:$src))], + opcode>; + +// BinOpMI8 - Instructions like "add [mem], imm8". +class BinOpMI8<string mnemonic, X86TypeInfo typeinfo, + Format f, list<dag> pattern> + : ITy<0x82, f, typeinfo, + (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src), + mnemonic, "{$src, $dst|$dst, $src}", pattern> { + let ImmT = Imm8; // Always 8-bit immediate. +} + +// BinOpMI8_RMW - Instructions like "add [mem], imm8". +class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpMI8<mnemonic, typeinfo, f, + [(store (opnode (load addr:$dst), + typeinfo.Imm8Operator:$src), addr:$dst), + (implicit EFLAGS)]>; + +// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8". +class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpMI8<mnemonic, typeinfo, f, + [(store (opnode (load addr:$dst), + typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst), + (implicit EFLAGS)]>; + +// BinOpMI8_F - Instructions like "cmp [mem], imm8". +class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpMI8<mnemonic, typeinfo, f, + [(set EFLAGS, (opnode (load addr:$dst), + typeinfo.Imm8Operator:$src))]>; + +// BinOpAI - Instructions like "add %eax, %eax, imm". +class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + Register areg> + : ITy<opcode, RawFrm, typeinfo, + (outs), (ins typeinfo.ImmOperand:$src), + mnemonic, !strconcat("{$src, %", areg.AsmName, "|%", + areg.AsmName, ", $src}"), []> { + let ImmT = typeinfo.ImmEncoding; + let Uses = [areg]; + let Defs = [areg]; +} + +/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is +/// defined with "(set GPR:$dst, EFLAGS, (...". +/// +/// It would be nice to get rid of the second and third argument here, but +/// tblgen can't handle dependent type references aggressively enough: PR8330 +multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, + string mnemonic, Format RegMRM, Format MemMRM, + SDNode opnodeflag, SDNode opnode, + bit CommutableRR, bit ConvertibleToThreeAddress> { + let Defs = [EFLAGS] in { + let Constraints = "$src1 = $dst" in { + let isCommutable = CommutableRR, + isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def #NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; + def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>; + def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>; + def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; + } // isCommutable + + def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; + def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; + def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; + def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; + + def #NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; + def #NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>; + def #NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>; + def #NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; + + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>; + def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>; + def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>; + + def #NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + def #NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>; + def #NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>; + def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>; + } + } // Constraints = "$src1 = $dst" + + def #NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>; + def #NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>; + def #NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>; + def #NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>; + + // NOTE: These are order specific, we want the mi8 forms to be listed + // first so that they are slightly preferred to the mi forms. + def #NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>; + def #NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>; + def #NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>; + + def #NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>; + def #NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>; + def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>; + def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>; + + def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>; + def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>; + def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>; + def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>; + } +} + +/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is +/// defined with "(set GPR:$dst, EFLAGS, (node LHS, RHS, EFLAGS))" like ADC and +/// SBB. +/// +/// It would be nice to get rid of the second and third argument here, but +/// tblgen can't handle dependent type references aggressively enough: PR8330 +multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, + string mnemonic, Format RegMRM, Format MemMRM, + SDNode opnode, bit CommutableRR, + bit ConvertibleToThreeAddress> { + let Defs = [EFLAGS] in { + let Constraints = "$src1 = $dst" in { + let isCommutable = CommutableRR, + isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def #NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>; + def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>; + def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>; + def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>; + } // isCommutable + + def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; + def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; + def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; + def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; + + def #NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>; + def #NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>; + def #NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>; + def #NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>; + + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>; + def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>; + def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>; + + def #NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def #NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>; + def #NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>; + def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>; + } + } // Constraints = "$src1 = $dst" + + def #NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>; + def #NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>; + def #NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>; + def #NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>; + + // NOTE: These are order specific, we want the mi8 forms to be listed + // first so that they are slightly preferred to the mi forms. + def #NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>; + def #NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>; + def #NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>; + + def #NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>; + def #NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>; + def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>; + def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>; + + def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>; + def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>; + def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>; + def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>; + } +} + +/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is +/// defined with "(set EFLAGS, (...". It would be really nice to find a way +/// to factor this with the other ArithBinOp_*. +/// +multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, + string mnemonic, Format RegMRM, Format MemMRM, + SDNode opnode, + bit CommutableRR, bit ConvertibleToThreeAddress> { + let Defs = [EFLAGS] in { + let isCommutable = CommutableRR, + isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def #NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>; + def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>; + def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; + } // isCommutable + + def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; + def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; + def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; + def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; + + def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; + def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>; + def #NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>; + def #NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; + + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>; + def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>; + def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>; + + def #NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def #NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>; + def #NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>; + def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>; + } + + def #NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def #NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>; + def #NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>; + def #NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; + + // NOTE: These are order specific, we want the mi8 forms to be listed + // first so that they are slightly preferred to the mi forms. + def #NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>; + def #NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>; + def #NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>; + + def #NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>; + def #NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>; + def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>; + def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>; + + def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>; + def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>; + def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>; + def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>; + } +} + + +defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m, + X86and_flag, and, 1, 0>; +defm OR : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m, + X86or_flag, or, 1, 0>; +defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m, + X86xor_flag, xor, 1, 0>; +defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m, + X86add_flag, add, 1, 1>; +defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m, + X86sub_flag, sub, 0, 0>; + +// Arithmetic. +let Uses = [EFLAGS] in { + defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag, + 1, 0>; + defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag, + 0, 0>; +} + +defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>; + + +//===----------------------------------------------------------------------===// +// Semantically, test instructions are similar like AND, except they don't +// generate a result. From an encoding perspective, they are very different: +// they don't have all the usual imm8 and REV forms, and are encoded into a +// different space. +def X86testpat : PatFrag<(ops node:$lhs, node:$rhs), + (X86cmp (and_su node:$lhs, node:$rhs), 0)>; + +let Defs = [EFLAGS] in { + let isCommutable = 1 in { + def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>; + def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>; + def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>; + def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>; + } // isCommutable + + def TEST8rm : BinOpRM_F<0x84, "test", Xi8 , X86testpat>; + def TEST16rm : BinOpRM_F<0x84, "test", Xi16, X86testpat>; + def TEST32rm : BinOpRM_F<0x84, "test", Xi32, X86testpat>; + def TEST64rm : BinOpRM_F<0x84, "test", Xi64, X86testpat>; + + def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>; + def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>; + def TEST32ri : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>; + def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>; + + def TEST8mi : BinOpMI_F<"test", Xi8 , X86testpat, MRM0m, 0xF6>; + def TEST16mi : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>; + def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>; + def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>; + + def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL>; + def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX>; + def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX>; + def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX>; +} + diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 2a6a71d..1ea8071 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -56,6 +56,31 @@ struct X86AddressMode { : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) { Base.Reg = 0; } + + + void getFullAddress(SmallVectorImpl<MachineOperand> &MO) { + assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8); + + if (BaseType == X86AddressMode::RegBase) + MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false, + false, false, false, 0, false)); + else { + assert(BaseType == X86AddressMode::FrameIndexBase); + MO.push_back(MachineOperand::CreateFI(Base.FrameIndex)); + } + + MO.push_back(MachineOperand::CreateImm(Scale)); + MO.push_back(MachineOperand::CreateReg(IndexReg, false, false, + false, false, false, 0, false)); + + if (GV) + MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags)); + else + MO.push_back(MachineOperand::CreateImm(Disp)); + + MO.push_back(MachineOperand::CreateReg(0, false, false, + false, false, false, 0, false)); + } }; /// addDirectMem - This function is used to add a direct memory reference to the @@ -101,10 +126,11 @@ addFullAddress(const MachineInstrBuilder &MIB, if (AM.BaseType == X86AddressMode::RegBase) MIB.addReg(AM.Base.Reg); - else if (AM.BaseType == X86AddressMode::FrameIndexBase) + else { + assert(AM.BaseType == X86AddressMode::FrameIndexBase); MIB.addFrameIndex(AM.Base.FrameIndex); - else - assert (0); + } + MIB.addImm(AM.Scale).addReg(AM.IndexReg); if (AM.GV) MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); @@ -131,9 +157,8 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { if (TID.mayStore()) Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), - Flags, Offset, - MFI.getObjectSize(FI), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset), + Flags, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); return addOffset(MIB.addFrameIndex(FI), Offset) .addMemOperand(MMO); diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td new file mode 100644 index 0000000..3a43b22 --- /dev/null +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -0,0 +1,104 @@ +//===- X86InstrCMovSetCC.td - Conditional Move and SetCC ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 conditional move and set on condition +// instructions. +// +//===----------------------------------------------------------------------===// + + +// SetCC instructions. +multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { + let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + isCommutable = 1 in { + def #NAME#16rr + : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), + [(set GR16:$dst, + (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize; + def #NAME#32rr + : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"), + [(set GR32:$dst, + (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB; + def #NAME#64rr + :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"), + [(set GR64:$dst, + (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB; + } + + let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in { + def #NAME#16rm + : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + CondNode, EFLAGS))]>, TB, OpSize; + def #NAME#32rm + : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"), + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + CondNode, EFLAGS))]>, TB; + def #NAME#64rm + :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"), + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + CondNode, EFLAGS))]>, TB; + } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" +} // end multiclass + + +// Conditional Moves. +defm CMOVO : CMOV<0x40, "cmovo" , X86_COND_O>; +defm CMOVNO : CMOV<0x41, "cmovno", X86_COND_NO>; +defm CMOVB : CMOV<0x42, "cmovb" , X86_COND_B>; +defm CMOVAE : CMOV<0x43, "cmovae", X86_COND_AE>; +defm CMOVE : CMOV<0x44, "cmove" , X86_COND_E>; +defm CMOVNE : CMOV<0x45, "cmovne", X86_COND_NE>; +defm CMOVBE : CMOV<0x46, "cmovbe", X86_COND_BE>; +defm CMOVA : CMOV<0x47, "cmova" , X86_COND_A>; +defm CMOVS : CMOV<0x48, "cmovs" , X86_COND_S>; +defm CMOVNS : CMOV<0x49, "cmovns", X86_COND_NS>; +defm CMOVP : CMOV<0x4A, "cmovp" , X86_COND_P>; +defm CMOVNP : CMOV<0x4B, "cmovnp", X86_COND_NP>; +defm CMOVL : CMOV<0x4C, "cmovl" , X86_COND_L>; +defm CMOVGE : CMOV<0x4D, "cmovge", X86_COND_GE>; +defm CMOVLE : CMOV<0x4E, "cmovle", X86_COND_LE>; +defm CMOVG : CMOV<0x4F, "cmovg" , X86_COND_G>; + + +// SetCC instructions. +multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> { + let Uses = [EFLAGS] in { + def r : I<opc, MRM0r, (outs GR8:$dst), (ins), + !strconcat(Mnemonic, "\t$dst"), + [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB; + def m : I<opc, MRM0m, (outs), (ins i8mem:$dst), + !strconcat(Mnemonic, "\t$dst"), + [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB; + } // Uses = [EFLAGS] +} + +defm SETO : SETCC<0x90, "seto", X86_COND_O>; // is overflow bit set +defm SETNO : SETCC<0x91, "setno", X86_COND_NO>; // is overflow bit not set +defm SETB : SETCC<0x92, "setb", X86_COND_B>; // unsigned less than +defm SETAE : SETCC<0x93, "setae", X86_COND_AE>; // unsigned greater or equal +defm SETE : SETCC<0x94, "sete", X86_COND_E>; // equal to +defm SETNE : SETCC<0x95, "setne", X86_COND_NE>; // not equal to +defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>; // unsigned less than or equal +defm SETA : SETCC<0x97, "seta", X86_COND_A>; // unsigned greater than +defm SETS : SETCC<0x98, "sets", X86_COND_S>; // is signed bit set +defm SETNS : SETCC<0x99, "setns", X86_COND_NS>; // is not signed +defm SETP : SETCC<0x9A, "setp", X86_COND_P>; // is parity bit set +defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>; // is parity bit not set +defm SETL : SETCC<0x9C, "setl", X86_COND_L>; // signed less than +defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal +defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal +defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than + diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td new file mode 100644 index 0000000..4c915d9 --- /dev/null +++ b/lib/Target/X86/X86InstrCompiler.td @@ -0,0 +1,1626 @@ +//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the various pseudo instructions used by the compiler, +// as well as Pat patterns used during instruction selection. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Pattern Matching Support + +def GetLo32XForm : SDNodeXForm<imm, [{ + // Transformation function: get the low 32 bits. + return getI32Imm((unsigned)N->getZExtValue()); +}]>; + +def GetLo8XForm : SDNodeXForm<imm, [{ + // Transformation function: get the low 8 bits. + return getI8Imm((uint8_t)N->getZExtValue()); +}]>; + + +//===----------------------------------------------------------------------===// +// Random Pseudo Instructions. + +// PIC base construction. This expands to code that looks like this: +// call $next_inst +// popl %destreg" +let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in + def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label), + "", []>; + + +// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into +// a stack adjustment and the codegen must know that they may modify the stack +// pointer before prolog-epilog rewriting occurs. +// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become +// sub / add which can clobber EFLAGS. +let Defs = [ESP, EFLAGS], Uses = [ESP] in { +def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt), + "#ADJCALLSTACKDOWN", + [(X86callseq_start timm:$amt)]>, + Requires<[In32BitMode]>; +def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), + "#ADJCALLSTACKUP", + [(X86callseq_end timm:$amt1, timm:$amt2)]>, + Requires<[In32BitMode]>; +} + +// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into +// a stack adjustment and the codegen must know that they may modify the stack +// pointer before prolog-epilog rewriting occurs. +// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become +// sub / add which can clobber EFLAGS. +let Defs = [RSP, EFLAGS], Uses = [RSP] in { +def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt), + "#ADJCALLSTACKDOWN", + [(X86callseq_start timm:$amt)]>, + Requires<[In64BitMode]>; +def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), + "#ADJCALLSTACKUP", + [(X86callseq_end timm:$amt1, timm:$amt2)]>, + Requires<[In64BitMode]>; +} + + + +// x86-64 va_start lowering magic. +let usesCustomInserter = 1 in { +def VASTART_SAVE_XMM_REGS : I<0, Pseudo, + (outs), + (ins GR8:$al, + i64imm:$regsavefi, i64imm:$offset, + variable_ops), + "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", + [(X86vastart_save_xmm_regs GR8:$al, + imm:$regsavefi, + imm:$offset)]>; + +// The VAARG_64 pseudo-instruction takes the address of the va_list, +// and places the address of the next argument into a register. +let Defs = [EFLAGS] in +def VAARG_64 : I<0, Pseudo, + (outs GR64:$dst), + (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align), + "#VAARG_64 $dst, $ap, $size, $mode, $align", + [(set GR64:$dst, + (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)), + (implicit EFLAGS)]>; + +// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows +// targets. These calls are needed to probe the stack when allocating more than +// 4k bytes in one go. Touching the stack at 4K increments is necessary to +// ensure that the guard pages used by the OS virtual memory manager are +// allocated in correct sequence. +// The main point of having separate instruction are extra unmodelled effects +// (compared to ordinary calls) like stack pointer change. + +let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in + def WIN_ALLOCA : I<0, Pseudo, (outs), (ins), + "# dynamic stack allocation", + [(X86WinAlloca)]>; +} + + + +//===----------------------------------------------------------------------===// +// EH Pseudo Instructions +// +let isTerminator = 1, isReturn = 1, isBarrier = 1, + hasCtrlDep = 1, isCodeGenOnly = 1 in { +def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), + "ret\t#eh_return, addr: $addr", + [(X86ehret GR32:$addr)]>; + +} + +let isTerminator = 1, isReturn = 1, isBarrier = 1, + hasCtrlDep = 1, isCodeGenOnly = 1 in { +def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), + "ret\t#eh_return, addr: $addr", + [(X86ehret GR64:$addr)]>; + +} + +//===----------------------------------------------------------------------===// +// Alias Instructions +//===----------------------------------------------------------------------===// + +// Alias instructions that map movr0 to xor. +// FIXME: remove when we can teach regalloc that xor reg, reg is ok. +// FIXME: Set encoding to pseudo. +let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, + isCodeGenOnly = 1 in { +def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", + [(set GR8:$dst, 0)]>; + +// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller +// encoding and avoids a partial-register update sometimes, but doing so +// at isel time interferes with rematerialization in the current register +// allocator. For now, this is rewritten when the instruction is lowered +// to an MCInst. +def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), + "", + [(set GR16:$dst, 0)]>, OpSize; + +// FIXME: Set encoding to pseudo. +def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", + [(set GR32:$dst, 0)]>; +} + +// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a +// smaller encoding, but doing so at isel time interferes with rematerialization +// in the current register allocator. For now, this is rewritten when the +// instruction is lowered to an MCInst. +// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove +// when we have a better way to specify isel priority. +let Defs = [EFLAGS], isCodeGenOnly=1, + AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", + [(set GR64:$dst, 0)]>; + +// Materialize i64 constant where top 32-bits are zero. This could theoretically +// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however +// that would make it more difficult to rematerialize. +let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, + isCodeGenOnly = 1 in +def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), + "", [(set GR64:$dst, i64immZExt32:$src)]>; + +// Use sbb to materialize carry bit. +let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in { +// FIXME: These are pseudo ops that should be replaced with Pat<> patterns. +// However, Pat<> can't replicate the destination reg into the inputs of the +// result. +// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces +// X86CodeEmitter. +def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "", + [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "", + [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>, + OpSize; +def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "", + [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "", + [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +} // isCodeGenOnly + + +def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C16r)>; +def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; +def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C64r)>; + +def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C16r)>; +def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; +def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C64r)>; + +// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and +// will be eliminated and that the sbb can be extended up to a wider type. When +// this happens, it is great. However, if we are left with an 8-bit sbb and an +// and, we might as well just match it as a setb. +def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), + (SETBr)>; + +//===----------------------------------------------------------------------===// +// String Pseudo Instructions +// +let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { +def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", + [(X86rep_movs i8)]>, REP; +def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", + [(X86rep_movs i16)]>, REP, OpSize; +def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", + [(X86rep_movs i32)]>, REP; +} + +let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in +def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", + [(X86rep_movs i64)]>, REP; + + +// FIXME: Should use "(X86rep_stos AL)" as the pattern. +let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in +def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", + [(X86rep_stos i8)]>, REP; +let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in +def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", + [(X86rep_stos i16)]>, REP, OpSize; +let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in +def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", + [(X86rep_stos i32)]>, REP; + +let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in +def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", + [(X86rep_stos i64)]>, REP; + + +//===----------------------------------------------------------------------===// +// Thread Local Storage Instructions +// + +// ELF TLS Support +// All calls clobber the non-callee saved registers. ESP is marked as +// a use to prevent stack-pointer assignments that appear immediately +// before calls from potentially appearing dead. +let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [ESP] in +def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), + "# TLS_addr32", + [(X86tlsaddr tls32addr:$sym)]>, + Requires<[In32BitMode]>; + +// All calls clobber the non-callee saved registers. RSP is marked as +// a use to prevent stack-pointer assignments that appear immediately +// before calls from potentially appearing dead. +let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [RSP] in +def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), + "# TLS_addr64", + [(X86tlsaddr tls64addr:$sym)]>, + Requires<[In64BitMode]>; + +// Darwin TLS Support +// For i386, the address of the thunk is passed on the stack, on return the +// address of the variable is in %eax. %ecx is trashed during the function +// call. All other registers are preserved. +let Defs = [EAX, ECX, EFLAGS], + Uses = [ESP], + usesCustomInserter = 1 in +def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), + "# TLSCall_32", + [(X86TLSCall addr:$sym)]>, + Requires<[In32BitMode]>; + +// For x86_64, the address of the thunk is passed in %rdi, on return +// the address of the variable is in %rax. All other registers are preserved. +let Defs = [RAX, EFLAGS], + Uses = [RSP, RDI], + usesCustomInserter = 1 in +def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), + "# TLSCall_64", + [(X86TLSCall addr:$sym)]>, + Requires<[In64BitMode]>; + + +//===----------------------------------------------------------------------===// +// Conditional Move Pseudo Instructions + +let Constraints = "$src1 = $dst" in { + +// Conditional moves +let Uses = [EFLAGS] in { + +// X86 doesn't have 8-bit conditional moves. Use a customInserter to +// emit control flow. An alternative to this is to mark i8 SELECT as Promote, +// however that requires promoting the operands, and can induce additional +// i8 register pressure. Note that CMOV_GR8 is conservatively considered to +// clobber EFLAGS, because if one of the operands is zero, the expansion +// could involve an xor. +let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in { +def CMOV_GR8 : I<0, Pseudo, + (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond), + "#CMOV_GR8 PSEUDO!", + [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2, + imm:$cond, EFLAGS))]>; + +let Predicates = [NoCMov] in { +def CMOV_GR32 : I<0, Pseudo, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond), + "#CMOV_GR32* PSEUDO!", + [(set GR32:$dst, + (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>; +def CMOV_GR16 : I<0, Pseudo, + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond), + "#CMOV_GR16* PSEUDO!", + [(set GR16:$dst, + (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>; +def CMOV_RFP32 : I<0, Pseudo, + (outs RFP32:$dst), + (ins RFP32:$src1, RFP32:$src2, i8imm:$cond), + "#CMOV_RFP32 PSEUDO!", + [(set RFP32:$dst, + (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond, + EFLAGS))]>; +def CMOV_RFP64 : I<0, Pseudo, + (outs RFP64:$dst), + (ins RFP64:$src1, RFP64:$src2, i8imm:$cond), + "#CMOV_RFP64 PSEUDO!", + [(set RFP64:$dst, + (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond, + EFLAGS))]>; +def CMOV_RFP80 : I<0, Pseudo, + (outs RFP80:$dst), + (ins RFP80:$src1, RFP80:$src2, i8imm:$cond), + "#CMOV_RFP80 PSEUDO!", + [(set RFP80:$dst, + (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, + EFLAGS))]>; +} // Predicates = [NoCMov] +} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] +} // Uses = [EFLAGS] + +} // Constraints = "$src1 = $dst" in + + +//===----------------------------------------------------------------------===// +// Atomic Instruction Pseudo Instructions +//===----------------------------------------------------------------------===// + +// Atomic exchange, and, or, xor +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomInserter = 1 in { + +def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMAND8 PSEUDO!", + [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>; +def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMOR8 PSEUDO!", + [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>; +def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMXOR8 PSEUDO!", + [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>; +def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMNAND8 PSEUDO!", + [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>; + +def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMAND16 PSEUDO!", + [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>; +def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMOR16 PSEUDO!", + [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>; +def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMXOR16 PSEUDO!", + [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>; +def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMNAND16 PSEUDO!", + [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>; +def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), + "#ATOMMIN16 PSEUDO!", + [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>; +def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMMAX16 PSEUDO!", + [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>; +def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMUMIN16 PSEUDO!", + [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>; +def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMUMAX16 PSEUDO!", + [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>; + + +def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMAND32 PSEUDO!", + [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>; +def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMOR32 PSEUDO!", + [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>; +def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMXOR32 PSEUDO!", + [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>; +def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMNAND32 PSEUDO!", + [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>; +def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), + "#ATOMMIN32 PSEUDO!", + [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>; +def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMMAX32 PSEUDO!", + [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>; +def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMUMIN32 PSEUDO!", + [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>; +def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMUMAX32 PSEUDO!", + [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>; + + + +def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMAND64 PSEUDO!", + [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>; +def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMOR64 PSEUDO!", + [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>; +def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMXOR64 PSEUDO!", + [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>; +def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMNAND64 PSEUDO!", + [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>; +def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val), + "#ATOMMIN64 PSEUDO!", + [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>; +def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMMAX64 PSEUDO!", + [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>; +def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMUMIN64 PSEUDO!", + [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>; +def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMUMAX64 PSEUDO!", + [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>; +} + +let Constraints = "$val1 = $dst1, $val2 = $dst2", + Defs = [EFLAGS, EAX, EBX, ECX, EDX], + Uses = [EAX, EBX, ECX, EDX], + mayLoad = 1, mayStore = 1, + usesCustomInserter = 1 in { +def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMAND6432 PSEUDO!", []>; +def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMOR6432 PSEUDO!", []>; +def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMXOR6432 PSEUDO!", []>; +def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMNAND6432 PSEUDO!", []>; +def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMADD6432 PSEUDO!", []>; +def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMSUB6432 PSEUDO!", []>; +def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMSWAP6432 PSEUDO!", []>; +} + +//===----------------------------------------------------------------------===// +// Normal-Instructions-With-Lock-Prefix Pseudo Instructions +//===----------------------------------------------------------------------===// + +// FIXME: Use normal instructions and add lock prefix dynamically. + +// Memory barriers + +// TODO: Get this to fold the constant into the instruction. +let isCodeGenOnly = 1 in +def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), + "lock\n\t" + "or{l}\t{$zero, $dst|$dst, $zero}", + []>, Requires<[In32BitMode]>, LOCK; + +let hasSideEffects = 1 in +def Int_MemBarrier : I<0, Pseudo, (outs), (ins), + "#MEMBARRIER", + [(X86MemBarrier)]>, Requires<[HasSSE2]>; + +// TODO: Get this to fold the constant into the instruction. +let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in +def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero), + "lock\n\t" + "or{q}\t{$zero, (%rsp)|(%rsp), $zero}", + [(X86MemBarrierNoSSE GR64:$zero)]>, + Requires<[In64BitMode]>, LOCK; + + +// Optimized codegen when the non-memory output is not used. +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { +def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), + "lock\n\t" + "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2), + "lock\n\t" + "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs), + (ins i64mem:$dst, i64i32imm :$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs), + (ins i64mem:$dst, i64i8imm :$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), + "lock\n\t" + "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + + +def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), + "lock\n\t" + "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs), + (ins i64mem:$dst, i64i32imm:$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + + +def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs), + (ins i64mem:$dst, i64i8imm :$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), + "lock\n\t" + "inc{b}\t$dst", []>, LOCK; +def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), + "lock\n\t" + "inc{w}\t$dst", []>, OpSize, LOCK; +def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), + "lock\n\t" + "inc{l}\t$dst", []>, LOCK; +def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), + "lock\n\t" + "inc{q}\t$dst", []>, LOCK; + +def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), + "lock\n\t" + "dec{b}\t$dst", []>, LOCK; +def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), + "lock\n\t" + "dec{w}\t$dst", []>, OpSize, LOCK; +def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), + "lock\n\t" + "dec{l}\t$dst", []>, LOCK; +def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), + "lock\n\t" + "dec{q}\t$dst", []>, LOCK; +} + +// Atomic compare and swap. +let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], + isCodeGenOnly = 1 in { +def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), + "lock\n\t" + "cmpxchg8b\t$ptr", + [(X86cas8 addr:$ptr)]>, TB, LOCK; +} +let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in { +def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), + "lock\n\t" + "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", + [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK; +} + +let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in { +def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap), + "lock\n\t" + "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", + [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK; +} + +let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in { +def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), + "lock\n\t" + "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", + [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; +} + +let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in { +def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), + "lock\n\t" + "cmpxchgq\t$swap,$ptr", + [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; +} + +// Atomic exchange and add +let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { +def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), + "lock\n\t" + "xadd{b}\t{$val, $ptr|$ptr, $val}", + [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>, + TB, LOCK; +def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr), + "lock\n\t" + "xadd{w}\t{$val, $ptr|$ptr, $val}", + [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>, + TB, OpSize, LOCK; +def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), + "lock\n\t" + "xadd{l}\t{$val, $ptr|$ptr, $val}", + [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>, + TB, LOCK; +def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr), + "lock\n\t" + "xadd\t$val, $ptr", + [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, + TB, LOCK; +} + +//===----------------------------------------------------------------------===// +// Conditional Move Pseudo Instructions. +//===----------------------------------------------------------------------===// + + +// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after +// instruction selection into a branch sequence. +let Uses = [EFLAGS], usesCustomInserter = 1 in { + def CMOV_FR32 : I<0, Pseudo, + (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond), + "#CMOV_FR32 PSEUDO!", + [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond, + EFLAGS))]>; + def CMOV_FR64 : I<0, Pseudo, + (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond), + "#CMOV_FR64 PSEUDO!", + [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond, + EFLAGS))]>; + def CMOV_V4F32 : I<0, Pseudo, + (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), + "#CMOV_V4F32 PSEUDO!", + [(set VR128:$dst, + (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond, + EFLAGS)))]>; + def CMOV_V2F64 : I<0, Pseudo, + (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), + "#CMOV_V2F64 PSEUDO!", + [(set VR128:$dst, + (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond, + EFLAGS)))]>; + def CMOV_V2I64 : I<0, Pseudo, + (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), + "#CMOV_V2I64 PSEUDO!", + [(set VR128:$dst, + (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond, + EFLAGS)))]>; +} + + +//===----------------------------------------------------------------------===// +// DAG Pattern Matching Rules +//===----------------------------------------------------------------------===// + +// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable +def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>; +def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>; +def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>; +def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>; +def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>; +def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>; + +def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)), + (ADD32ri GR32:$src1, tconstpool:$src2)>; +def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)), + (ADD32ri GR32:$src1, tjumptable:$src2)>; +def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)), + (ADD32ri GR32:$src1, tglobaladdr:$src2)>; +def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)), + (ADD32ri GR32:$src1, texternalsym:$src2)>; +def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)), + (ADD32ri GR32:$src1, tblockaddress:$src2)>; + +def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst), + (MOV32mi addr:$dst, tglobaladdr:$src)>; +def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst), + (MOV32mi addr:$dst, texternalsym:$src)>; +def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst), + (MOV32mi addr:$dst, tblockaddress:$src)>; + + + +// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small +// code model mode, should use 'movabs'. FIXME: This is really a hack, the +// 'movabs' predicate should handle this sort of thing. +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri tconstpool :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri tjumptable :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri texternalsym:$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), + (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>; + +// In static codegen with small code model, we can get the address of a label +// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of +// the MOV64ri64i32 should accept these. +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), + (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>; + +// In kernel code model, we can get the address of a label +// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of +// the MOV64ri32 should accept these. +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; +def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), + (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>; + +// If we have small model and -static mode, it is safe to store global addresses +// directly as immediates. FIXME: This is really a hack, the 'imm' predicate +// for MOV64mi32 should handle this sort of thing. +def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tconstpool:$src)>, + Requires<[NearData, IsStatic]>; +def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tjumptable:$src)>, + Requires<[NearData, IsStatic]>; +def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tglobaladdr:$src)>, + Requires<[NearData, IsStatic]>; +def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), + (MOV64mi32 addr:$dst, texternalsym:$src)>, + Requires<[NearData, IsStatic]>; +def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tblockaddress:$src)>, + Requires<[NearData, IsStatic]>; + + + +// Calls + +// tls has some funny stuff here... +// This corresponds to movabs $foo@tpoff, %rax +def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), + (MOV64ri tglobaltlsaddr :$dst)>; +// This corresponds to add $foo@tpoff, %rax +def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), + (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; +// This corresponds to mov foo@tpoff(%rbx), %eax +def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), + (MOV64rm tglobaltlsaddr :$dst)>; + + +// Direct PC relative function call for small code model. 32-bit displacement +// sign extended to 64-bit. +def : Pat<(X86call (i64 tglobaladdr:$dst)), + (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>; +def : Pat<(X86call (i64 texternalsym:$dst)), + (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>; + +def : Pat<(X86call (i64 tglobaladdr:$dst)), + (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>; +def : Pat<(X86call (i64 texternalsym:$dst)), + (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>; + +// tailcall stuff +def : Pat<(X86tcret GR32_TC:$dst, imm:$off), + (TCRETURNri GR32_TC:$dst, imm:$off)>, + Requires<[In32BitMode]>; + +// FIXME: This is disabled for 32-bit PIC mode because the global base +// register which is part of the address mode may be assigned a +// callee-saved register. +def : Pat<(X86tcret (load addr:$dst), imm:$off), + (TCRETURNmi addr:$dst, imm:$off)>, + Requires<[In32BitMode, IsNotPIC]>; + +def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), + (TCRETURNdi texternalsym:$dst, imm:$off)>, + Requires<[In32BitMode]>; + +def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), + (TCRETURNdi texternalsym:$dst, imm:$off)>, + Requires<[In32BitMode]>; + +def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), + (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, + Requires<[In64BitMode]>; + +def : Pat<(X86tcret (load addr:$dst), imm:$off), + (TCRETURNmi64 addr:$dst, imm:$off)>, + Requires<[In64BitMode]>; + +def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), + (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, + Requires<[In64BitMode]>; + +def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), + (TCRETURNdi64 texternalsym:$dst, imm:$off)>, + Requires<[In64BitMode]>; + +// Normal calls, with various flavors of addresses. +def : Pat<(X86call (i32 tglobaladdr:$dst)), + (CALLpcrel32 tglobaladdr:$dst)>; +def : Pat<(X86call (i32 texternalsym:$dst)), + (CALLpcrel32 texternalsym:$dst)>; +def : Pat<(X86call (i32 imm:$dst)), + (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>; + +// Comparisons. + +// TEST R,R is smaller than CMP R,0 +def : Pat<(X86cmp GR8:$src1, 0), + (TEST8rr GR8:$src1, GR8:$src1)>; +def : Pat<(X86cmp GR16:$src1, 0), + (TEST16rr GR16:$src1, GR16:$src1)>; +def : Pat<(X86cmp GR32:$src1, 0), + (TEST32rr GR32:$src1, GR32:$src1)>; +def : Pat<(X86cmp GR64:$src1, 0), + (TEST64rr GR64:$src1, GR64:$src1)>; + +// Conditional moves with folded loads with operands swapped and conditions +// inverted. +multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32, + Instruction Inst64> { + def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), + (Inst16 GR16:$src2, addr:$src1)>; + def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), + (Inst32 GR32:$src2, addr:$src1)>; + def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), + (Inst64 GR64:$src2, addr:$src1)>; +} + +defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>; +defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>; +defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>; +defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>; +defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>; +defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>; +defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>; +defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>; +defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>; +defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>; +defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>; +defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>; +defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>; +defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>; +defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>; +defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>; + +// zextload bool -> zextload byte +def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>; +def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; +def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; +def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; + +// extload bool -> extload byte +// When extloading from 16-bit and smaller memory locations into 64-bit +// registers, use zero-extending loads so that the entire 64-bit register is +// defined, avoiding partial-register updates. + +def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>; +def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; +def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; +def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>; +def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; +def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; + +def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; +def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>; +def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; +// For other extloads, use subregs, since the high contents of the register are +// defined after an extload. +def : Pat<(extloadi64i32 addr:$src), + (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), + sub_32bit)>; + +// anyext. Define these to do an explicit zero-extend to +// avoid partial-register updates. +def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; +def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; + +// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. +def : Pat<(i32 (anyext GR16:$src)), + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; + +def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; +def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; +def : Pat<(i64 (anyext GR32:$src)), + (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; + + +// Any instruction that defines a 32-bit result leaves the high half of the +// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may +// be copying from a truncate. And x86's cmov doesn't do anything if the +// condition is false. But any other 32-bit operation will zero-extend +// up to 64 bits. +def def32 : PatLeaf<(i32 GR32:$src), [{ + return N->getOpcode() != ISD::TRUNCATE && + N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && + N->getOpcode() != ISD::CopyFromReg && + N->getOpcode() != X86ISD::CMOV; +}]>; + +// In the case of a 32-bit def that is known to implicitly zero-extend, +// we can use a SUBREG_TO_REG. +def : Pat<(i64 (zext def32:$src)), + (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; + +//===----------------------------------------------------------------------===// +// Pattern match OR as ADD +//===----------------------------------------------------------------------===// + +// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be +// 3-addressified into an LEA instruction to avoid copies. However, we also +// want to finally emit these instructions as an or at the end of the code +// generator to make the generated code easier to read. To do this, we select +// into "disjoint bits" pseudo ops. + +// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. +def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) + return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero0, KnownOne0; + CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + APInt KnownZero1, KnownOne1; + CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + return (~KnownZero0 & ~KnownZero1) == 0; +}]>; + + +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. +let AddedComplexity = 5 in { // Try this before the selecting to OR + +let isConvertibleToThreeAddress = 1, + Constraints = "$src1 = $dst", Defs = [EFLAGS] in { +let isCommutable = 1 in { +def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "", // orw/addw REG, REG + [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>; +def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "", // orl/addl REG, REG + [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>; +def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "", // orq/addq REG, REG + [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>; +} // isCommutable + +// NOTE: These are order specific, we want the ri8 forms to be listed +// first so that they are slightly preferred to the ri forms. + +def ADD16ri8_DB : I<0, Pseudo, + (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), + "", // orw/addw REG, imm8 + [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>; +def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "", // orw/addw REG, imm + [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>; + +def ADD32ri8_DB : I<0, Pseudo, + (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), + "", // orl/addl REG, imm8 + [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>; +def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "", // orl/addl REG, imm + [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>; + + +def ADD64ri8_DB : I<0, Pseudo, + (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), + "", // orq/addq REG, imm8 + [(set GR64:$dst, (or_is_add GR64:$src1, + i64immSExt8:$src2))]>; +def ADD64ri32_DB : I<0, Pseudo, + (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), + "", // orq/addq REG, imm + [(set GR64:$dst, (or_is_add GR64:$src1, + i64immSExt32:$src2))]>; +} +} // AddedComplexity + + +//===----------------------------------------------------------------------===// +// Some peepholes +//===----------------------------------------------------------------------===// + +// Odd encoding trick: -128 fits into an 8-bit immediate field while +// +128 doesn't, so in this special case use a sub instead of an add. +def : Pat<(add GR16:$src1, 128), + (SUB16ri8 GR16:$src1, -128)>; +def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst), + (SUB16mi8 addr:$dst, -128)>; + +def : Pat<(add GR32:$src1, 128), + (SUB32ri8 GR32:$src1, -128)>; +def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), + (SUB32mi8 addr:$dst, -128)>; + +def : Pat<(add GR64:$src1, 128), + (SUB64ri8 GR64:$src1, -128)>; +def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst), + (SUB64mi8 addr:$dst, -128)>; + +// The same trick applies for 32-bit immediate fields in 64-bit +// instructions. +def : Pat<(add GR64:$src1, 0x0000000080000000), + (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; +def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), + (SUB64mi32 addr:$dst, 0xffffffff80000000)>; + +// To avoid needing to materialize an immediate in a register, use a 32-bit and +// with implicit zero-extension instead of a 64-bit and if the immediate has at +// least 32 bits of leading zeros. If in addition the last 32 bits can be +// represented with a sign extension of a 8 bit constant, use that. + +def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri8 + (EXTRACT_SUBREG GR64:$src, sub_32bit), + (i32 (GetLo8XForm imm:$imm))), + sub_32bit)>; + +def : Pat<(and GR64:$src, i64immZExt32:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri + (EXTRACT_SUBREG GR64:$src, sub_32bit), + (i32 (GetLo32XForm imm:$imm))), + sub_32bit)>; + + +// r & (2^16-1) ==> movz +def : Pat<(and GR32:$src1, 0xffff), + (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; +// r & (2^8-1) ==> movz +def : Pat<(and GR32:$src1, 0xff), + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, + GR32_ABCD)), + sub_8bit))>, + Requires<[In32BitMode]>; +// r & (2^8-1) ==> movz +def : Pat<(and GR16:$src1, 0xff), + (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, + GR16_ABCD)), + sub_8bit))>, + Requires<[In32BitMode]>; + +// r & (2^32-1) ==> movz +def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), + (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; +// r & (2^16-1) ==> movz +def : Pat<(and GR64:$src, 0xffff), + (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>; +// r & (2^8-1) ==> movz +def : Pat<(and GR64:$src, 0xff), + (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>; +// r & (2^8-1) ==> movz +def : Pat<(and GR32:$src1, 0xff), + (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>, + Requires<[In64BitMode]>; +// r & (2^8-1) ==> movz +def : Pat<(and GR16:$src1, 0xff), + (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>, + Requires<[In64BitMode]>; + + +// sext_inreg patterns +def : Pat<(sext_inreg GR32:$src, i16), + (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>; +def : Pat<(sext_inreg GR32:$src, i8), + (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + sub_8bit))>, + Requires<[In32BitMode]>; +def : Pat<(sext_inreg GR16:$src, i8), + (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, + GR16_ABCD)), + sub_8bit))>, + Requires<[In32BitMode]>; + +def : Pat<(sext_inreg GR64:$src, i32), + (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; +def : Pat<(sext_inreg GR64:$src, i16), + (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>; +def : Pat<(sext_inreg GR64:$src, i8), + (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>; +def : Pat<(sext_inreg GR32:$src, i8), + (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>, + Requires<[In64BitMode]>; +def : Pat<(sext_inreg GR16:$src, i8), + (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>, + Requires<[In64BitMode]>; + + +// trunc patterns +def : Pat<(i16 (trunc GR32:$src)), + (EXTRACT_SUBREG GR32:$src, sub_16bit)>; +def : Pat<(i8 (trunc GR32:$src)), + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), + sub_8bit)>, + Requires<[In32BitMode]>; +def : Pat<(i8 (trunc GR16:$src)), + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit)>, + Requires<[In32BitMode]>; +def : Pat<(i32 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, sub_32bit)>; +def : Pat<(i16 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, sub_16bit)>; +def : Pat<(i8 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, sub_8bit)>; +def : Pat<(i8 (trunc GR32:$src)), + (EXTRACT_SUBREG GR32:$src, sub_8bit)>, + Requires<[In64BitMode]>; +def : Pat<(i8 (trunc GR16:$src)), + (EXTRACT_SUBREG GR16:$src, sub_8bit)>, + Requires<[In64BitMode]>; + +// h-register tricks +def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi)>, + Requires<[In32BitMode]>; +def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), + sub_8bit_hi)>, + Requires<[In32BitMode]>; +def : Pat<(srl GR16:$src, (i8 8)), + (EXTRACT_SUBREG + (MOVZX32rr8 + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi)), + sub_16bit)>, + Requires<[In32BitMode]>; +def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), + (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, + GR16_ABCD)), + sub_8bit_hi))>, + Requires<[In32BitMode]>; +def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), + (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, + GR16_ABCD)), + sub_8bit_hi))>, + Requires<[In32BitMode]>; +def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + sub_8bit_hi))>, + Requires<[In32BitMode]>; +def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + sub_8bit_hi))>, + Requires<[In32BitMode]>; + +// h-register tricks. +// For now, be conservative on x86-64 and use an h-register extract only if the +// value is immediately zero-extended or stored, which are somewhat common +// cases. This uses a bunch of code to prevent a register requiring a REX prefix +// from being allocated in the same instruction as the h register, as there's +// currently no way to describe this requirement to the register allocator. + +// h-register extract and zero-extend. +def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), + sub_8bit_hi)), + sub_32bit)>; +def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), + sub_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), + (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + sub_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(srl GR16:$src, (i8 8)), + (EXTRACT_SUBREG + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi)), + sub_16bit)>, + Requires<[In64BitMode]>; +def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi)), + sub_32bit)>; +def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi)), + sub_32bit)>; + +// h-register extract and store. +def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), + (MOV8mr_NOREX + addr:$dst, + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), + sub_8bit_hi))>; +def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), + (MOV8mr_NOREX + addr:$dst, + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), + sub_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), + (MOV8mr_NOREX + addr:$dst, + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi))>, + Requires<[In64BitMode]>; + + +// (shl x, 1) ==> (add x, x) +def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; +def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; +def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; +def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; + +// (shl x (and y, 31)) ==> (shl x, y) +def : Pat<(shl GR8:$src1, (and CL, 31)), + (SHL8rCL GR8:$src1)>; +def : Pat<(shl GR16:$src1, (and CL, 31)), + (SHL16rCL GR16:$src1)>; +def : Pat<(shl GR32:$src1, (and CL, 31)), + (SHL32rCL GR32:$src1)>; +def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), + (SHL8mCL addr:$dst)>; +def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), + (SHL16mCL addr:$dst)>; +def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), + (SHL32mCL addr:$dst)>; + +def : Pat<(srl GR8:$src1, (and CL, 31)), + (SHR8rCL GR8:$src1)>; +def : Pat<(srl GR16:$src1, (and CL, 31)), + (SHR16rCL GR16:$src1)>; +def : Pat<(srl GR32:$src1, (and CL, 31)), + (SHR32rCL GR32:$src1)>; +def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), + (SHR8mCL addr:$dst)>; +def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), + (SHR16mCL addr:$dst)>; +def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), + (SHR32mCL addr:$dst)>; + +def : Pat<(sra GR8:$src1, (and CL, 31)), + (SAR8rCL GR8:$src1)>; +def : Pat<(sra GR16:$src1, (and CL, 31)), + (SAR16rCL GR16:$src1)>; +def : Pat<(sra GR32:$src1, (and CL, 31)), + (SAR32rCL GR32:$src1)>; +def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst), + (SAR8mCL addr:$dst)>; +def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), + (SAR16mCL addr:$dst)>; +def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), + (SAR32mCL addr:$dst)>; + +// (shl x (and y, 63)) ==> (shl x, y) +def : Pat<(shl GR64:$src1, (and CL, 63)), + (SHL64rCL GR64:$src1)>; +def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), + (SHL64mCL addr:$dst)>; + +def : Pat<(srl GR64:$src1, (and CL, 63)), + (SHR64rCL GR64:$src1)>; +def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), + (SHR64mCL addr:$dst)>; + +def : Pat<(sra GR64:$src1, (and CL, 63)), + (SAR64rCL GR64:$src1)>; +def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), + (SAR64mCL addr:$dst)>; + + +// (anyext (setcc_carry)) -> (setcc_carry) +def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C16r)>; +def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; +def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; + + + + +//===----------------------------------------------------------------------===// +// EFLAGS-defining Patterns +//===----------------------------------------------------------------------===// + +// add reg, reg +def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>; +def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>; + +// add reg, mem +def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), + (ADD8rm GR8:$src1, addr:$src2)>; +def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), + (ADD16rm GR16:$src1, addr:$src2)>; +def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), + (ADD32rm GR32:$src1, addr:$src2)>; + +// add reg, imm +def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>; +def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>; +def : Pat<(add GR16:$src1, i16immSExt8:$src2), + (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(add GR32:$src1, i32immSExt8:$src2), + (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// sub reg, reg +def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>; +def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>; + +// sub reg, mem +def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), + (SUB8rm GR8:$src1, addr:$src2)>; +def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), + (SUB16rm GR16:$src1, addr:$src2)>; +def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), + (SUB32rm GR32:$src1, addr:$src2)>; + +// sub reg, imm +def : Pat<(sub GR8:$src1, imm:$src2), + (SUB8ri GR8:$src1, imm:$src2)>; +def : Pat<(sub GR16:$src1, imm:$src2), + (SUB16ri GR16:$src1, imm:$src2)>; +def : Pat<(sub GR32:$src1, imm:$src2), + (SUB32ri GR32:$src1, imm:$src2)>; +def : Pat<(sub GR16:$src1, i16immSExt8:$src2), + (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(sub GR32:$src1, i32immSExt8:$src2), + (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// mul reg, reg +def : Pat<(mul GR16:$src1, GR16:$src2), + (IMUL16rr GR16:$src1, GR16:$src2)>; +def : Pat<(mul GR32:$src1, GR32:$src2), + (IMUL32rr GR32:$src1, GR32:$src2)>; + +// mul reg, mem +def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), + (IMUL16rm GR16:$src1, addr:$src2)>; +def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), + (IMUL32rm GR32:$src1, addr:$src2)>; + +// mul reg, imm +def : Pat<(mul GR16:$src1, imm:$src2), + (IMUL16rri GR16:$src1, imm:$src2)>; +def : Pat<(mul GR32:$src1, imm:$src2), + (IMUL32rri GR32:$src1, imm:$src2)>; +def : Pat<(mul GR16:$src1, i16immSExt8:$src2), + (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(mul GR32:$src1, i32immSExt8:$src2), + (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>; + +// reg = mul mem, imm +def : Pat<(mul (loadi16 addr:$src1), imm:$src2), + (IMUL16rmi addr:$src1, imm:$src2)>; +def : Pat<(mul (loadi32 addr:$src1), imm:$src2), + (IMUL32rmi addr:$src1, imm:$src2)>; +def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2), + (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>; +def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2), + (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>; + +// Optimize multiply by 2 with EFLAGS result. +let AddedComplexity = 2 in { +def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>; +def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>; +} + +// Patterns for nodes that do not produce flags, for instructions that do. + +// addition +def : Pat<(add GR64:$src1, GR64:$src2), + (ADD64rr GR64:$src1, GR64:$src2)>; +def : Pat<(add GR64:$src1, i64immSExt8:$src2), + (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(add GR64:$src1, i64immSExt32:$src2), + (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; +def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), + (ADD64rm GR64:$src1, addr:$src2)>; + +// subtraction +def : Pat<(sub GR64:$src1, GR64:$src2), + (SUB64rr GR64:$src1, GR64:$src2)>; +def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), + (SUB64rm GR64:$src1, addr:$src2)>; +def : Pat<(sub GR64:$src1, i64immSExt8:$src2), + (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(sub GR64:$src1, i64immSExt32:$src2), + (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Multiply +def : Pat<(mul GR64:$src1, GR64:$src2), + (IMUL64rr GR64:$src1, GR64:$src2)>; +def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), + (IMUL64rm GR64:$src1, addr:$src2)>; +def : Pat<(mul GR64:$src1, i64immSExt8:$src2), + (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(mul GR64:$src1, i64immSExt32:$src2), + (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>; +def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2), + (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>; +def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), + (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; + +// Increment reg. +def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>; +def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>; +def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>; +def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; + +// Decrement reg. +def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>; +def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>; +def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>; +def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; + +// or reg/reg. +def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>; +def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>; +def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>; + +// or reg/mem +def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), + (OR8rm GR8:$src1, addr:$src2)>; +def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), + (OR16rm GR16:$src1, addr:$src2)>; +def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), + (OR32rm GR32:$src1, addr:$src2)>; +def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), + (OR64rm GR64:$src1, addr:$src2)>; + +// or reg/imm +def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>; +def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>; +def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>; +def : Pat<(or GR16:$src1, i16immSExt8:$src2), + (OR16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(or GR32:$src1, i32immSExt8:$src2), + (OR32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(or GR64:$src1, i64immSExt8:$src2), + (OR64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(or GR64:$src1, i64immSExt32:$src2), + (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// xor reg/reg +def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>; +def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>; +def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>; + +// xor reg/mem +def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), + (XOR8rm GR8:$src1, addr:$src2)>; +def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), + (XOR16rm GR16:$src1, addr:$src2)>; +def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), + (XOR32rm GR32:$src1, addr:$src2)>; +def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), + (XOR64rm GR64:$src1, addr:$src2)>; + +// xor reg/imm +def : Pat<(xor GR8:$src1, imm:$src2), + (XOR8ri GR8:$src1, imm:$src2)>; +def : Pat<(xor GR16:$src1, imm:$src2), + (XOR16ri GR16:$src1, imm:$src2)>; +def : Pat<(xor GR32:$src1, imm:$src2), + (XOR32ri GR32:$src1, imm:$src2)>; +def : Pat<(xor GR16:$src1, i16immSExt8:$src2), + (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(xor GR32:$src1, i32immSExt8:$src2), + (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(xor GR64:$src1, i64immSExt8:$src2), + (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(xor GR64:$src1, i64immSExt32:$src2), + (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// and reg/reg +def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>; +def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>; +def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>; + +// and reg/mem +def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), + (AND8rm GR8:$src1, addr:$src2)>; +def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), + (AND16rm GR16:$src1, addr:$src2)>; +def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), + (AND32rm GR32:$src1, addr:$src2)>; +def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), + (AND64rm GR64:$src1, addr:$src2)>; + +// and reg/imm +def : Pat<(and GR8:$src1, imm:$src2), + (AND8ri GR8:$src1, imm:$src2)>; +def : Pat<(and GR16:$src1, imm:$src2), + (AND16ri GR16:$src1, imm:$src2)>; +def : Pat<(and GR32:$src1, imm:$src2), + (AND32ri GR32:$src1, imm:$src2)>; +def : Pat<(and GR16:$src1, i16immSExt8:$src2), + (AND16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(and GR32:$src1, i32immSExt8:$src2), + (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(and GR64:$src1, i64immSExt8:$src2), + (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(and GR64:$src1, i64immSExt32:$src2), + (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td new file mode 100644 index 0000000..77f4725 --- /dev/null +++ b/lib/Target/X86/X86InstrControl.td @@ -0,0 +1,294 @@ +//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 jump, return, call, and related instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Control Flow Instructions. +// + +// Return instructions. +let isTerminator = 1, isReturn = 1, isBarrier = 1, + hasCtrlDep = 1, FPForm = SpecialFP in { + def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), + "ret", + [(X86retflag 0)]>; + def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), + "ret\t$amt", + [(X86retflag timm:$amt)]>; + def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), + "retw\t$amt", + []>, OpSize; + def LRETL : I <0xCB, RawFrm, (outs), (ins), + "lretl", []>; + def LRETQ : RI <0xCB, RawFrm, (outs), (ins), + "lretq", []>; + def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), + "lret\t$amt", []>; + def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), + "lretw\t$amt", []>, OpSize; +} + +// Unconditional branches. +let isBarrier = 1, isBranch = 1, isTerminator = 1 in { + def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), + "jmp\t$dst", [(br bb:$dst)]>; + def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), + "jmp\t$dst", []>; + def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst), + "jmp{q}\t$dst", []>; +} + +// Conditional Branches. +let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { + multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { + def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>; + def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm, + [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB; + } +} + +defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>; +defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>; +defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>; +defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>; +defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>; +defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>; +defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>; +defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>; +defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>; +defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>; +defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>; +defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>; +defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>; +defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>; +defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; +defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; + +// jcx/jecx/jrcx instructions. +let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in { + // These are the 32-bit versions of this instruction for the asmparser. In + // 32-bit mode, the address size prefix is jcxz and the unprefixed version is + // jecxz. + let Uses = [CX] in + def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>; + let Uses = [ECX] in + def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jecxz\t$dst", []>, Requires<[In32BitMode]>; + + // J*CXZ instruction: 64-bit versions of this instruction for the asmparser. + // In 64-bit mode, the address size prefix is jecxz and the unprefixed version + // is jrcxz. + let Uses = [ECX] in + def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>; + let Uses = [RCX] in + def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jrcxz\t$dst", []>, Requires<[In64BitMode]>; +} + +// Indirect branches +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { + def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", + [(brind GR32:$dst)]>, Requires<[In32BitMode]>; + def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", + [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>; + + def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", + [(brind GR64:$dst)]>, Requires<[In64BitMode]>; + def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", + [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>; + + def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), + (ins i16imm:$off, i16imm:$seg), + "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize; + def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs), + (ins i32imm:$off, i16imm:$seg), + "ljmp{l}\t{$seg, $off|$off, $seg}", []>; + def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), + "ljmp{q}\t{*}$dst", []>; + + def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), + "ljmp{w}\t{*}$dst", []>, OpSize; + def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst), + "ljmp{l}\t{*}$dst", []>; +} + + +// Loop instructions + +def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>; +def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>; +def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>; + +//===----------------------------------------------------------------------===// +// Call Instructions... +// +let isCall = 1 in + // All calls clobber the non-callee saved registers. ESP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [ESP] in { + def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, + (outs), (ins i32imm_pcrel:$dst,variable_ops), + "call{l}\t$dst", []>, Requires<[In32BitMode]>; + def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), + "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, + Requires<[In32BitMode]>; + def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), + "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>, + Requires<[In32BitMode]>; + + def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), + (ins i16imm:$off, i16imm:$seg), + "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize; + def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs), + (ins i32imm:$off, i16imm:$seg), + "lcall{l}\t{$seg, $off|$off, $seg}", []>; + + def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), + "lcall{w}\t{*}$dst", []>, OpSize; + def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), + "lcall{l}\t{*}$dst", []>; + + // callw for 16 bit code for the assembler. + let isAsmParserOnly = 1 in + def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm, + (outs), (ins i16imm_pcrel:$dst, variable_ops), + "callw\t$dst", []>, OpSize; + } + + +// Tail call stuff. + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + isCodeGenOnly = 1 in + let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [ESP] in { + def TCRETURNdi : PseudoI<(outs), + (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>; + def TCRETURNri : PseudoI<(outs), + (ins GR32_TC:$dst, i32imm:$offset, variable_ops), []>; + let mayLoad = 1 in + def TCRETURNmi : PseudoI<(outs), + (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), []>; + + // FIXME: The should be pseudo instructions that are lowered when going to + // mcinst. + def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs), + (ins i32imm_pcrel:$dst, variable_ops), + "jmp\t$dst # TAILCALL", + []>; + def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), + "", []>; // FIXME: Remove encoding when JIT is dead. + let mayLoad = 1 in + def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), + "jmp{l}\t{*}$dst # TAILCALL", []>; +} + + +//===----------------------------------------------------------------------===// +// Call Instructions... +// +let isCall = 1 in + // All calls clobber the non-callee saved registers. RSP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [RSP] in { + + // NOTE: this pattern doesn't match "X86call imm", because we do not know + // that the offset between an arbitrary immediate and the call will fit in + // the 32-bit pcrel field that we have. + def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, + (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + "call{q}\t$dst", []>, + Requires<[In64BitMode, NotWin64]>; + def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), + "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, + Requires<[In64BitMode, NotWin64]>; + def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), + "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, + Requires<[In64BitMode, NotWin64]>; + + def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), + "lcall{q}\t{*}$dst", []>; + } + + // FIXME: We need to teach codegen about single list of call-clobbered + // registers. +let isCall = 1, isCodeGenOnly = 1 in + // All calls clobber the non-callee saved registers. RSP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], + Uses = [RSP] in { + def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, + (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + "call{q}\t$dst", []>, + Requires<[IsWin64]>; + def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), + "call{q}\t{*}$dst", + [(X86call GR64:$dst)]>, Requires<[IsWin64]>; + def WINCALL64m : I<0xFF, MRM2m, (outs), + (ins i64mem:$dst,variable_ops), + "call{q}\t{*}$dst", + [(X86call (loadi64 addr:$dst))]>, + Requires<[IsWin64]>; + } + + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + isCodeGenOnly = 1 in + // AMD64 cc clobbers RSI, RDI, XMM6-XMM15. + let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], + Uses = [RSP], + usesCustomInserter = 1 in { + def TCRETURNdi64 : PseudoI<(outs), + (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops), + []>; + def TCRETURNri64 : PseudoI<(outs), + (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>; + let mayLoad = 1 in + def TCRETURNmi64 : PseudoI<(outs), + (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>; + + def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), + (ins i64i32imm_pcrel:$dst, variable_ops), + "jmp\t$dst # TAILCALL", []>; + def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops), + "jmp{q}\t{*}$dst # TAILCALL", []>; + + let mayLoad = 1 in + def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops), + "jmp{q}\t{*}$dst # TAILCALL", []>; +} diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td new file mode 100644 index 0000000..867c0f8 --- /dev/null +++ b/lib/Target/X86/X86InstrExtension.td @@ -0,0 +1,172 @@ +//===- X86InstrExtension.td - Sign and Zero Extensions -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the sign and zero extension operations. +// +//===----------------------------------------------------------------------===// + +let neverHasSideEffects = 1 in { + let Defs = [AX], Uses = [AL] in + def CBW : I<0x98, RawFrm, (outs), (ins), + "{cbtw|cbw}", []>, OpSize; // AX = signext(AL) + let Defs = [EAX], Uses = [AX] in + def CWDE : I<0x98, RawFrm, (outs), (ins), + "{cwtl|cwde}", []>; // EAX = signext(AX) + + let Defs = [AX,DX], Uses = [AX] in + def CWD : I<0x99, RawFrm, (outs), (ins), + "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX) + let Defs = [EAX,EDX], Uses = [EAX] in + def CDQ : I<0x99, RawFrm, (outs), (ins), + "{cltd|cdq}", []>; // EDX:EAX = signext(EAX) + + + let Defs = [RAX], Uses = [EAX] in + def CDQE : RI<0x98, RawFrm, (outs), (ins), + "{cltq|cdqe}", []>; // RAX = signext(EAX) + + let Defs = [RAX,RDX], Uses = [RAX] in + def CQO : RI<0x99, RawFrm, (outs), (ins), + "{cqto|cqo}", []>; // RDX:RAX = signext(RAX) +} + + +// Sign/Zero extenders +// Use movsbl intead of movsbw; we don't care about the high 16 bits +// of the register here. This has a smaller encoding and avoids a +// partial-register update. Actual movsbw included for the disassembler. +def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), + "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), + "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + +// FIXME: Use a pat pattern or define a syntax here. +let isCodeGenOnly=1 in { +def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), + "", [(set GR16:$dst, (sext GR8:$src))]>, TB; +def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), + "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB; +} +def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), + "movs{bl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (sext GR8:$src))]>, TB; +def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), + "movs{bl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB; +def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), + "movs{wl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (sext GR16:$src))]>, TB; +def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), + "movs{wl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB; + +// Use movzbl intead of movzbw; we don't care about the high 16 bits +// of the register here. This has a smaller encoding and avoids a +// partial-register update. Actual movzbw included for the disassembler. +def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), + "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), + "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +// FIXME: Use a pat pattern or define a syntax here. +let isCodeGenOnly=1 in { +def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), + "", [(set GR16:$dst, (zext GR8:$src))]>, TB; +def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), + "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB; +} +def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), + "movz{bl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (zext GR8:$src))]>, TB; +def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), + "movz{bl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB; +def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), + "movz{wl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (zext GR16:$src))]>, TB; +def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), + "movz{wl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB; + +// These are the same as the regular MOVZX32rr8 and MOVZX32rm8 +// except that they use GR32_NOREX for the output operand register class +// instead of GR32. This allows them to operate on h registers on x86-64. +def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, + (outs GR32_NOREX:$dst), (ins GR8:$src), + "movz{bl|x}\t{$src, $dst|$dst, $src}", + []>, TB; +let mayLoad = 1 in +def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, + (outs GR32_NOREX:$dst), (ins i8mem:$src), + "movz{bl|x}\t{$src, $dst|$dst, $src}", + []>, TB; + +// MOVSX64rr8 always has a REX prefix and it has an 8-bit register +// operand, which makes it a rare instruction with an 8-bit register +// operand that can never access an h register. If support for h registers +// were generalized, this would require a special register class. +def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), + "movs{bq|x}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sext GR8:$src))]>, TB; +def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), + "movs{bq|x}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB; +def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), + "movs{wq|x}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sext GR16:$src))]>, TB; +def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "movs{wq|x}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB; +def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), + "movs{lq|xd}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sext GR32:$src))]>; +def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), + "movs{lq|xd}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sextloadi64i32 addr:$src))]>; + +// movzbq and movzwq encodings for the disassembler +def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), + "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; +def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), + "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; +def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), + "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; +def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; + +// FIXME: These should be Pat patterns. +let isCodeGenOnly = 1 in { + +// Use movzbl instead of movzbq when the destination is a register; it's +// equivalent due to implicit zero-extending, and it has a smaller encoding. +def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), + "", [(set GR64:$dst, (zext GR8:$src))]>, TB; +def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), + "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; +// Use movzwl instead of movzwq when the destination is a register; it's +// equivalent due to implicit zero-extending, and it has a smaller encoding. +def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), + "", [(set GR64:$dst, (zext GR16:$src))]>, TB; +def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; + +// There's no movzlq instruction, but movl can be used for this purpose, using +// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero +// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit +// zero-extension, however this isn't possible when the 32-bit value is +// defined by a truncate or is copied from something where the high bits aren't +// necessarily all zero. In such cases, we fall back to these explicit zext +// instructions. +def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), + "", [(set GR64:$dst, (zext GR32:$src))]>; +def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), + "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; + + +} + diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 9c9bcc7..b506f5e 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -32,21 +32,24 @@ def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86fst : SDNode<"X86ISD::FST", SDTX86Fst, - [SDNPHasChain, SDNPInFlag, SDNPMayStore]>; + [SDNPHasChain, SDNPInGlue, SDNPMayStore, + SDNPMemOperand]>; def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild, - [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>; + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad, + SDNPMemOperand]>; def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem, - [SDNPHasChain, SDNPMayStore]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem, - [SDNPHasChain, SDNPMayStore]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem, - [SDNPHasChain, SDNPMayStore]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore, - [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; + [SDNPHasChain, SDNPMayStore, SDNPSideEffect, + SDNPMemOperand]>; //===----------------------------------------------------------------------===// // FPStack pattern fragments @@ -70,41 +73,23 @@ def fpimmneg1 : PatLeaf<(fpimm), [{ // Some 'special' instructions let usesCustomInserter = 1 in { // Expanded after instruction selection. - def FP32_TO_INT16_IN_MEM : I<0, Pseudo, - (outs), (ins i16mem:$dst, RFP32:$src), - "##FP32_TO_INT16_IN_MEM PSEUDO!", + def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src), [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>; - def FP32_TO_INT32_IN_MEM : I<0, Pseudo, - (outs), (ins i32mem:$dst, RFP32:$src), - "##FP32_TO_INT32_IN_MEM PSEUDO!", + def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src), [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>; - def FP32_TO_INT64_IN_MEM : I<0, Pseudo, - (outs), (ins i64mem:$dst, RFP32:$src), - "##FP32_TO_INT64_IN_MEM PSEUDO!", + def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src), [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>; - def FP64_TO_INT16_IN_MEM : I<0, Pseudo, - (outs), (ins i16mem:$dst, RFP64:$src), - "##FP64_TO_INT16_IN_MEM PSEUDO!", + def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src), [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>; - def FP64_TO_INT32_IN_MEM : I<0, Pseudo, - (outs), (ins i32mem:$dst, RFP64:$src), - "##FP64_TO_INT32_IN_MEM PSEUDO!", + def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src), [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>; - def FP64_TO_INT64_IN_MEM : I<0, Pseudo, - (outs), (ins i64mem:$dst, RFP64:$src), - "##FP64_TO_INT64_IN_MEM PSEUDO!", + def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src), [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>; - def FP80_TO_INT16_IN_MEM : I<0, Pseudo, - (outs), (ins i16mem:$dst, RFP80:$src), - "##FP80_TO_INT16_IN_MEM PSEUDO!", + def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src), [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>; - def FP80_TO_INT32_IN_MEM : I<0, Pseudo, - (outs), (ins i32mem:$dst, RFP80:$src), - "##FP80_TO_INT32_IN_MEM PSEUDO!", + def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src), [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>; - def FP80_TO_INT64_IN_MEM : I<0, Pseudo, - (outs), (ins i64mem:$dst, RFP80:$src), - "##FP80_TO_INT64_IN_MEM PSEUDO!", + def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src), [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>; } @@ -212,11 +197,11 @@ def _Fp80m64: FpI_<(outs RFP80:$dst), [(set RFP80:$dst, (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>; def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src), - !strconcat("f", !strconcat(asmstring, "{s}\t$src"))> { + !strconcat("f", asmstring, "{s}\t$src")> { let mayLoad = 1; } def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src), - !strconcat("f", !strconcat(asmstring, "{l}\t$src"))> { + !strconcat("f", asmstring, "{l}\t$src")> { let mayLoad = 1; } // ST(0) = ST(0) + [memint] @@ -245,11 +230,11 @@ def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), [(set RFP80:$dst, (OpNode RFP80:$src1, (X86fild addr:$src2, i32)))]>; def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src), - !strconcat("fi", !strconcat(asmstring, "{s}\t$src"))> { + !strconcat("fi", asmstring, "{s}\t$src")> { let mayLoad = 1; } def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), - !strconcat("fi", !strconcat(asmstring, "{l}\t$src"))> { + !strconcat("fi", asmstring, "{l}\t$src")> { let mayLoad = 1; } } @@ -580,16 +565,16 @@ def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i) (outs), (ins RST:$reg), - "fucomi\t{$reg, %st(0)|%ST(0), $reg}">, DB; + "fucomi\t$reg">, DB; def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop (outs), (ins RST:$reg), - "fucomip\t{$reg, %st(0)|%ST(0), $reg}">, DF; + "fucompi\t$reg">, DF; } def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), - "fcomi\t{$reg, %st(0)|%ST(0), $reg}">, DB; + "fcomi\t$reg">, DB; def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), - "fcomip\t{$reg, %st(0)|%ST(0), $reg}">, DF; + "fcompi\t$reg">, DF; // Floating point flag ops. let Defs = [AX] in @@ -604,8 +589,8 @@ let mayLoad = 1 in def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] (outs), (ins i16mem:$dst), "fldcw\t$dst", []>; -// Register free - +// FPU control instructions +def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB; def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg), "ffree\t$reg">, DD; @@ -613,7 +598,8 @@ def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg), def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB; -// Operandless floating-point instructions for the disassembler +// Operandless floating-point instructions for the disassembler. +def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>; def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9; def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9; @@ -639,8 +625,12 @@ def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE; def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins), "fxsave\t$dst", []>, TB; +def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins), + "fxsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), "fxrstor\t$src", []>, TB; +def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), + "fxrstorq\t$src", []>, TB, REX_W, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 79187e9..344c14c 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -39,7 +39,8 @@ def MRM_E8 : Format<39>; def MRM_F0 : Format<40>; def MRM_F8 : Format<41>; def MRM_F9 : Format<42>; -def RawFrmImm16 : Format<43>; +def RawFrmImm8 : Format<43>; +def RawFrmImm16 : Format<44>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -108,6 +109,7 @@ class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } class VEX_L { bit hasVEX_L = 1; } +class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, Domain d = GenericDomain> @@ -123,6 +125,9 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, dag InOperandList = ins; string AsmString = AsmStr; + // If this is a pseudo instruction, mark it isCodeGenOnly. + let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo"); + // // Attributes specific to X86 instructions... // @@ -130,17 +135,18 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix? bits<4> Prefix = 0; // Which prefix byte does this inst have? - bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix? + bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix? FPFormat FPForm = NotFP; // What flavor of FP instruction is this? bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? bits<2> SegOvrBits = 0; // Segment override prefix. Domain ExeDomain = d; - bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix? + bit hasVEXPrefix = 0; // Does this inst require a VEX prefix? bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field? - bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field? - bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register + bit hasVEX_4VPrefix = 0; // Does this inst require the VEX.VVVV field? + bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register // to be encoded in a immediate field? - bit hasVEX_L = 0; // Does this inst uses large (256-bit) registers? + bit hasVEX_L = 0; // Does this inst use large (256-bit) registers? + bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding? // TSFlags layout should be kept in sync with X86InstrInfo.h. let TSFlags{5-0} = FormBits; @@ -159,6 +165,12 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{34} = hasVEX_4VPrefix; let TSFlags{35} = hasVEX_i8ImmReg; let TSFlags{36} = hasVEX_L; + let TSFlags{37} = has3DNow0F0FOpcode; +} + +class PseudoI<dag oops, dag iops, list<dag> pattern> + : X86Inst<0, Pseudo, NoImm, oops, iops, ""> { + let Pattern = pattern; } class I<bits<8> o, Format f, dag outs, dag ins, string asm, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 01149b6..5016c0f 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -15,51 +15,8 @@ // MMX Pattern Fragments //===----------------------------------------------------------------------===// -def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>; - -def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>; -def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>; -def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>; -def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>; - -//===----------------------------------------------------------------------===// -// MMX Masks -//===----------------------------------------------------------------------===// - -// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to -// PSHUFW imm. -def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShuffleSHUFImmediate(N)); -}]>; - -// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> -def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> -def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> -def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> -def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); -}], MMX_SHUFFLE_get_shuf_imm>; +def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>; +def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>; //===----------------------------------------------------------------------===// // SSE specific DAG Nodes. @@ -86,6 +43,21 @@ def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; +def X86pandn : SDNode<"X86ISD::PANDN", + SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86psignb : SDNode<"X86ISD::PSIGNB", + SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86psignw : SDNode<"X86ISD::PSIGNW", + SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86psignd : SDNode<"X86ISD::PSIGND", + SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86pblendv : SDNode<"X86ISD::PBLENDVB", + SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; def X86pextrw : SDNode<"X86ISD::PEXTRW", @@ -102,7 +74,7 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS", def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>; def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>; def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>; @@ -134,18 +106,12 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; -def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>, - SDTCisInt<2>]>; - def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>; -def X86PShufhwLd : SDNode<"X86ISD::PSHUFHW_LD", SDTShuff2OpLdI>; -def X86PShuflwLd : SDNode<"X86ISD::PSHUFLW_LD", SDTShuff2OpLdI>; - def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>; def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>; @@ -187,9 +153,11 @@ def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; // the top elements. These are used for the SSE 'ss' and 'sd' instruction // forms. def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [], - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, + SDNPWantRoot]>; def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [], - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, + SDNPWantRoot]>; def ssmem : Operand<v4f32> { let PrintMethod = "printf32mem"; @@ -273,6 +241,7 @@ def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; +def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>; def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; // 256-bit memop pattern fragments @@ -289,10 +258,7 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 8; }]>; -def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>; -def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; -def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; -def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; +def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>; // MOVNT Support // Like 'store', but requires the non-temporal bit to be set @@ -376,6 +342,18 @@ def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShufflePALIGNRImmediate(N)); }]>; +// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index +// to VEXTRACTF128 imm. +def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{ + return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N)); +}]>; + +// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to +// VINSERTF128 imm. +def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{ + return getI8Imm(X86::getInsertVINSERTF128Immediate(N)); +}]>; + def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); @@ -466,3 +444,16 @@ def palign : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N)); }], SHUFFLE_get_palign_imm>; + +def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index), + (extract_subvector node:$bigvec, + node:$index), [{ + return X86::isVEXTRACTF128Index(N); +}], EXTRACT_get_vextractf128_imm>; + +def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec, + node:$index), + (insert_subvector node:$bigvec, node:$smallvec, + node:$index), [{ + return X86::isVINSERTF128Index(N); +}], INSERT_get_vinsertf128_imm>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 5280940..ceb1b65 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -34,7 +34,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/MC/MCAsmInfo.h" - #include <limits> using namespace llvm; @@ -55,7 +54,11 @@ ReMatPICStubLoad("remat-pic-stub-load", X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), TM(tm), RI(tm, *this) { - SmallVector<unsigned,16> AmbEntries; + enum { + TB_NOT_REVERSABLE = 1U << 31, + TB_FLAGS = TB_NOT_REVERSABLE + }; + static const unsigned OpTbl2Addr[][2] = { { X86::ADC32ri, X86::ADC32mi }, { X86::ADC32ri8, X86::ADC32mi8 }, @@ -65,13 +68,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::ADC64rr, X86::ADC64mr }, { X86::ADD16ri, X86::ADD16mi }, { X86::ADD16ri8, X86::ADD16mi8 }, + { X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE }, + { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE }, { X86::ADD16rr, X86::ADD16mr }, + { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE }, { X86::ADD32ri, X86::ADD32mi }, { X86::ADD32ri8, X86::ADD32mi8 }, + { X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE }, + { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE }, { X86::ADD32rr, X86::ADD32mr }, + { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE }, { X86::ADD64ri32, X86::ADD64mi32 }, { X86::ADD64ri8, X86::ADD64mi8 }, + { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE }, + { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE }, { X86::ADD64rr, X86::ADD64mr }, + { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE }, { X86::ADD8ri, X86::ADD8mi }, { X86::ADD8rr, X86::ADD8mr }, { X86::AND16ri, X86::AND16mi }, @@ -216,16 +228,21 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { unsigned RegOp = OpTbl2Addr[i][0]; - unsigned MemOp = OpTbl2Addr[i][1]; - if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, - std::make_pair(MemOp,0))).second) - assert(false && "Duplicated entries?"); + unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS; + assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?"); + RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U); + + // If this is not a reversable operation (because there is a many->one) + // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. + if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE) + continue; + // Index 0, folded load and store, no alignment requirement. unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, - AuxInfo))).second) - AmbEntries.push_back(MemOp); + + assert(!MemOp2RegOpTable.count(MemOp) && + "Duplicated entries in unfolding maps?"); + MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); } // If the third value is 1, then it's folding either a load or a store. @@ -252,8 +269,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::DIV64r, X86::DIV64m, 1, 0 }, { X86::DIV8r, X86::DIV8m, 1, 0 }, { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, - { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 }, { X86::IDIV16r, X86::IDIV16m, 1, 0 }, { X86::IDIV32r, X86::IDIV32m, 1, 0 }, { X86::IDIV64r, X86::IDIV64m, 1, 0 }, @@ -268,7 +285,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOV16rr, X86::MOV16mr, 0, 0 }, { X86::MOV32ri, X86::MOV32mi, 0, 0 }, { X86::MOV32rr, X86::MOV32mr, 0, 0 }, - { X86::MOV32rr_TC, X86::MOV32mr_TC, 0, 0 }, { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, { X86::MOV64rr, X86::MOV64mr, 0, 0 }, { X86::MOV8ri, X86::MOV8mi, 0, 0 }, @@ -312,19 +328,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { - unsigned RegOp = OpTbl0[i][0]; - unsigned MemOp = OpTbl0[i][1]; - unsigned Align = OpTbl0[i][3]; - if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, - std::make_pair(MemOp,Align))).second) - assert(false && "Duplicated entries?"); + unsigned RegOp = OpTbl0[i][0]; + unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS; unsigned FoldedLoad = OpTbl0[i][2]; + unsigned Align = OpTbl0[i][3]; + assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?"); + RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align); + + // If this is not a reversable operation (because there is a many->one) + // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. + if (OpTbl0[i][1] & TB_NOT_REVERSABLE) + continue; + // Index 0, folded load or store. unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); - if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo))).second) - AmbEntries.push_back(MemOp); + assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?"); + MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); } static const unsigned OpTbl1[][3] = { @@ -342,8 +361,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, - { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 }, + { X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 }, { X86::IMUL16rri, X86::IMUL16rmi, 0 }, { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, { X86::IMUL32rri, X86::IMUL32rmi, 0 }, @@ -360,8 +379,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, - { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, - { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, + { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, + { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, @@ -370,8 +389,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, - { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, - { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, + { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 }, + { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 }, { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, @@ -380,7 +399,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, { X86::MOV16rr, X86::MOV16rm, 0 }, { X86::MOV32rr, X86::MOV32rm, 0 }, - { X86::MOV32rr_TC, X86::MOV32rm_TC, 0 }, { X86::MOV64rr, X86::MOV64rm, 0 }, { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, @@ -439,25 +457,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { unsigned RegOp = OpTbl1[i][0]; - unsigned MemOp = OpTbl1[i][1]; + unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS; unsigned Align = OpTbl1[i][2]; - if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, - std::make_pair(MemOp,Align))).second) - assert(false && "Duplicated entries?"); + assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries"); + RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align); + + // If this is not a reversable operation (because there is a many->one) + // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. + if (OpTbl1[i][1] & TB_NOT_REVERSABLE) + continue; + // Index 1, folded load unsigned AuxInfo = 1 | (1 << 4); - if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo))).second) - AmbEntries.push_back(MemOp); + assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries"); + MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); } static const unsigned OpTbl2[][3] = { { X86::ADC32rr, X86::ADC32rm, 0 }, { X86::ADC64rr, X86::ADC64rm, 0 }, { X86::ADD16rr, X86::ADD16rm, 0 }, + { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 }, { X86::ADD32rr, X86::ADD32rm, 0 }, + { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 }, { X86::ADD64rr, X86::ADD64rm, 0 }, + { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 }, { X86::ADD8rr, X86::ADD8rm, 0 }, { X86::ADDPDrr, X86::ADDPDrm, 16 }, { X86::ADDPSrr, X86::ADDPSrm, 16 }, @@ -652,20 +676,23 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { unsigned RegOp = OpTbl2[i][0]; - unsigned MemOp = OpTbl2[i][1]; + unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS; unsigned Align = OpTbl2[i][2]; - if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, - std::make_pair(MemOp,Align))).second) - assert(false && "Duplicated entries?"); + + assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!"); + RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align); + + // If this is not a reversable operation (because there is a many->one) + // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. + if (OpTbl2[i][1] & TB_NOT_REVERSABLE) + continue; + // Index 2, folded load unsigned AuxInfo = 2 | (1 << 4); - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo))).second) - AmbEntries.push_back(MemOp); + assert(!MemOp2RegOpTable.count(MemOp) && + "Duplicated entries in unfolding maps?"); + MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); } - - // Remove ambiguous entries. - assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); } bool @@ -745,9 +772,7 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MOV8rm: case X86::MOV16rm: case X86::MOV32rm: - case X86::MOV32rm_TC: case X86::MOV64rm: - case X86::MOV64rm_TC: case X86::LD_Fp64m: case X86::MOVSSrm: case X86::MOVSDrm: @@ -768,9 +793,7 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::MOV8mr: case X86::MOV16mr: case X86::MOV32mr: - case X86::MOV32mr_TC: case X86::MOV64mr: - case X86::MOV64mr_TC: case X86::ST_FpP64m: case X86::MOVSSmr: case X86::MOVSDmr: @@ -785,7 +808,7 @@ static bool isFrameStoreOpcode(int Opcode) { return false; } -unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, +unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameLoadOpcode(MI->getOpcode())) if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) @@ -793,7 +816,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return 0; } -unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, +unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { if (isFrameLoadOpcode(MI->getOpcode())) { unsigned Reg; @@ -923,10 +946,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, isPICBase = true; } return isPICBase; - } + } return false; } - + case X86::LEA32r: case X86::LEA64r: { if (MI->getOperand(2).isImm() && @@ -1099,11 +1122,11 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ? X86::LEA64_32r : X86::LEA32r; MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); - unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); + unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); - + // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. + // well be shifting and then extracting the lower 16-bits. // This has the potential to cause partial register stall. e.g. // movw (%rbp,%rcx,2), %dx // leal -65(%rdx), %esi @@ -1137,9 +1160,12 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, break; case X86::ADD16ri: case X86::ADD16ri8: - addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); + case X86::ADD16ri_DB: + case X86::ADD16ri8_DB: + addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); break; - case X86::ADD16rr: { + case X86::ADD16rr: + case X86::ADD16rr_DB: { unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); unsigned leaInReg2 = 0; @@ -1149,9 +1175,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, // just a single insert_subreg. addRegReg(MIB, leaInReg, true, leaInReg, false); } else { - leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass); + leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. + // well be shifting and then extracting the lower 16-bits. BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); InsMI2 = BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) @@ -1218,7 +1244,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SHUFPSrri: { assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; - + unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); if (B != C) return 0; @@ -1236,6 +1262,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = MI->getOperand(2).getImm(); if (ShAmt == 0 || ShAmt >= 4) return 0; + // LEA can't handle RSP. + if (TargetRegisterInfo::isVirtualRegister(Src) && + !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass)) + return 0; + NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) @@ -1250,6 +1281,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = MI->getOperand(2).getImm(); if (ShAmt == 0 || ShAmt >= 4) return 0; + // LEA can't handle ESP. + if (TargetRegisterInfo::isVirtualRegister(Src) && + !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass)) + return 0; + unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) @@ -1288,6 +1324,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + + // LEA can't handle RSP. + if (TargetRegisterInfo::isVirtualRegister(Src) && + !MF.getRegInfo().constrainRegClass(Src, + MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass : + X86::GR32_NOSPRegisterClass)) + return 0; + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), @@ -1310,6 +1354,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + // LEA can't handle RSP. + if (TargetRegisterInfo::isVirtualRegister(Src) && + !MF.getRegInfo().constrainRegClass(Src, + MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass : + X86::GR32_NOSPRegisterClass)) + return 0; + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), @@ -1327,12 +1378,29 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, Src, isKill, -1); break; case X86::ADD64rr: - case X86::ADD32rr: { + case X86::ADD64rr_DB: + case X86::ADD32rr: + case X86::ADD32rr_DB: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r - : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + unsigned Opc; + TargetRegisterClass *RC; + if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) { + Opc = X86::LEA64r; + RC = X86::GR64_NOSPRegisterClass; + } else { + Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; + RC = X86::GR32_NOSPRegisterClass; + } + + unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); + + // LEA can't handle RSP. + if (TargetRegisterInfo::isVirtualRegister(Src2) && + !MF.getRegInfo().constrainRegClass(Src2, RC)) + return 0; + NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), @@ -1341,7 +1409,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, LV->replaceKillInstruction(Src2, MI, NewMI); break; } - case X86::ADD16rr: { + case X86::ADD16rr: + case X86::ADD16rr_DB: { if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); @@ -1357,6 +1426,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::ADD64ri32: case X86::ADD64ri8: + case X86::ADD64ri32_DB: + case X86::ADD64ri8_DB: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) .addReg(Dest, RegState::Define | @@ -1364,7 +1435,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, Src, isKill, MI->getOperand(2).getImm()); break; case X86::ADD32ri: - case X86::ADD32ri8: { + case X86::ADD32ri8: + case X86::ADD32ri_DB: + case X86::ADD32ri8_DB: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) @@ -1375,6 +1448,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::ADD16ri: case X86::ADD16ri8: + case X86::ADD16ri_DB: + case X86::ADD16ri8_DB: if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); @@ -1396,7 +1471,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, LV->replaceKillInstruction(Dest, MI, NewMI); } - MFI->insert(MBBI, NewMI); // Insert the new inst + MFI->insert(MBBI, NewMI); // Insert the new inst return NewMI; } @@ -1617,7 +1692,7 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { const TargetInstrDesc &TID = MI->getDesc(); if (!TID.isTerminator()) return false; - + // Conditional branch is a special case. if (TID.isBranch() && !TID.isBarrier()) return true; @@ -1626,7 +1701,7 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { return !isPredicated(MI); } -bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, +bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, @@ -1787,7 +1862,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { I = MBB.end(); ++Count; } - + return Count; } @@ -1945,13 +2020,23 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, default: llvm_unreachable("Unknown regclass"); case X86::GR64RegClassID: + case X86::GR64_ABCDRegClassID: + case X86::GR64_NOREXRegClassID: + case X86::GR64_NOREX_NOSPRegClassID: case X86::GR64_NOSPRegClassID: + case X86::GR64_TCRegClassID: + case X86::GR64_TCW64RegClassID: return load ? X86::MOV64rm : X86::MOV64mr; case X86::GR32RegClassID: - case X86::GR32_NOSPRegClassID: + case X86::GR32_ABCDRegClassID: case X86::GR32_ADRegClassID: + case X86::GR32_NOREXRegClassID: + case X86::GR32_NOSPRegClassID: + case X86::GR32_TCRegClassID: return load ? X86::MOV32rm : X86::MOV32mr; case X86::GR16RegClassID: + case X86::GR16_ABCDRegClassID: + case X86::GR16_NOREXRegClassID: return load ? X86::MOV16rm : X86::MOV16mr; case X86::GR8RegClassID: // Copying to or from a physical H register on x86-64 requires a NOREX @@ -1961,32 +2046,14 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; else return load ? X86::MOV8rm : X86::MOV8mr; - case X86::GR64_ABCDRegClassID: - return load ? X86::MOV64rm : X86::MOV64mr; - case X86::GR32_ABCDRegClassID: - return load ? X86::MOV32rm : X86::MOV32mr; - case X86::GR16_ABCDRegClassID: - return load ? X86::MOV16rm : X86::MOV16mr; case X86::GR8_ABCD_LRegClassID: + case X86::GR8_NOREXRegClassID: return load ? X86::MOV8rm :X86::MOV8mr; case X86::GR8_ABCD_HRegClassID: if (TM.getSubtarget<X86Subtarget>().is64Bit()) return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; else return load ? X86::MOV8rm : X86::MOV8mr; - case X86::GR64_NOREXRegClassID: - case X86::GR64_NOREX_NOSPRegClassID: - return load ? X86::MOV64rm : X86::MOV64mr; - case X86::GR32_NOREXRegClassID: - return load ? X86::MOV32rm : X86::MOV32mr; - case X86::GR16_NOREXRegClassID: - return load ? X86::MOV16rm : X86::MOV16mr; - case X86::GR8_NOREXRegClassID: - return load ? X86::MOV8rm : X86::MOV8mr; - case X86::GR64_TCRegClassID: - return load ? X86::MOV64rm_TC : X86::MOV64mr_TC; - case X86::GR32_TCRegClassID: - return load ? X86::MOV32rm_TC : X86::MOV32mr_TC; case X86::RFP80RegClassID: return load ? X86::LD_Fp80m : X86::ST_FpP80m; case X86::RFP64RegClassID: @@ -2085,76 +2152,6 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, NewMIs.push_back(MIB); } -bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL = MBB.findDebugLoc(MI); - - bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); - bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); - unsigned SlotSize = is64Bit ? 8 : 4; - - MachineFunction &MF = *MBB.getParent(); - unsigned FPReg = RI.getFrameRegister(MF); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - unsigned CalleeFrameSize = 0; - - unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; - for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); - // Add the callee-saved register as live-in. It's killed at the spill. - MBB.addLiveIn(Reg); - if (Reg == FPReg) - // X86RegisterInfo::emitPrologue will handle spilling of frame register. - continue; - if (!X86::VR128RegClass.contains(Reg) && !isWin64) { - CalleeFrameSize += SlotSize; - BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), - RC, &RI); - } - } - - X86FI->setCalleeSavedFrameSize(CalleeFrameSize); - return true; -} - -bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL = MBB.findDebugLoc(MI); - - MachineFunction &MF = *MBB.getParent(); - unsigned FPReg = RI.getFrameRegister(MF); - bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); - bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); - unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (Reg == FPReg) - // X86RegisterInfo::emitEpilogue will handle restoring of frame register. - continue; - if (!X86::VR128RegClass.contains(Reg) && !isWin64) { - BuildMI(MBB, MI, DL, get(Opc), Reg); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - RC, &RI); - } - } - return true; -} - MachineInstr* X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -2181,7 +2178,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, MIB.addOperand(MOs[i]); if (NumAddrOps < 4) // FrameIndex only addOffset(MIB, 0); - + // Loop over the rest of the ri operands, converting them over. unsigned NumOps = MI->getDesc().getNumOperands()-2; for (unsigned i = 0; i != NumOps; ++i) { @@ -2202,7 +2199,7 @@ static MachineInstr *FuseInst(MachineFunction &MF, MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), MI->getDebugLoc(), true); MachineInstrBuilder MIB(NewMI); - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (i == OpNo) { @@ -2238,7 +2235,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned i, const SmallVectorImpl<MachineOperand> &MOs, unsigned Size, unsigned Align) const { - const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; + const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0; bool isTwoAddrFold = false; unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && @@ -2251,7 +2248,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (isTwoAddr && NumOps >= 2 && i < 2 && MI->getOperand(0).isReg() && MI->getOperand(1).isReg() && - MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { + MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; isTwoAddrFold = true; } else if (i == 0) { // If operand 0 @@ -2265,19 +2262,19 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); if (NewMI) return NewMI; - + OpcodeTablePtr = &RegOp2MemOpTable0; } else if (i == 1) { OpcodeTablePtr = &RegOp2MemOpTable1; } else if (i == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; } - + // If table selected... if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = - OpcodeTablePtr->find((unsigned*)MI->getOpcode()); + DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = + OpcodeTablePtr->find(MI->getOpcode()); if (I != OpcodeTablePtr->end()) { unsigned Opcode = I->second.first; unsigned MinAlign = I->second.second; @@ -2320,8 +2317,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NewMI; } } - - // No fusion + + // No fusion if (PrintFailedFusing && !MI->isCopy()) dbgs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; @@ -2332,7 +2329,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, int FrameIndex) const { - // Check switch flag + // Check switch flag if (NoFusing) return NULL; if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) @@ -2343,8 +2340,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::Int_CVTSS2SDrr: case X86::RCPSSr: case X86::RCPSSr_Int: - case X86::ROUNDSDr_Int: - case X86::ROUNDSSr_Int: + case X86::ROUNDSDr: + case X86::ROUNDSSr: case X86::RSQRTSSr: case X86::RSQRTSSr_Int: case X86::SQRTSSr: @@ -2384,7 +2381,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, MachineInstr *LoadMI) const { - // Check switch flag + // Check switch flag if (NoFusing) return NULL; if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) @@ -2395,8 +2392,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::Int_CVTSS2SDrr: case X86::RCPSSr: case X86::RCPSSr_Int: - case X86::ROUNDSDr_Int: - case X86::ROUNDSSr_Int: + case X86::ROUNDSDr: + case X86::ROUNDSSr: case X86::RSQRTSSr: case X86::RSQRTSSr_Int: case X86::SQRTSSr: @@ -2424,9 +2421,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = 16; break; case X86::FsFLD0SD: + case X86::VFsFLD0SD: Alignment = 8; break; case X86::FsFLD0SS: + case X86::VFsFLD0SS: Alignment = 4; break; default: @@ -2490,9 +2489,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineConstantPool &MCP = *MF.getConstantPool(); const Type *Ty; unsigned Opc = LoadMI->getOpcode(); - if (Opc == X86::FsFLD0SS) + if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS) Ty = Type::getFloatTy(MF.getFunction()->getContext()); - else if (Opc == X86::FsFLD0SD) + else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction()->getContext()); else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); @@ -2525,13 +2524,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const { - // Check switch flag + // Check switch flag if (NoFusing) return 0; if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { switch (MI->getOpcode()) { default: return false; - case X86::TEST8rr: + case X86::TEST8rr: case X86::TEST16rr: case X86::TEST32rr: case X86::TEST64rr: @@ -2551,16 +2550,15 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires // replacing the *two* registers with the memory location. - const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; - if (isTwoAddr && NumOps >= 2 && OpNum < 2) { + const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0; + if (isTwoAddr && NumOps >= 2 && OpNum < 2) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; } else if (OpNum == 0) { // If operand 0 switch (Opc) { case X86::MOV8r0: case X86::MOV16r0: case X86::MOV32r0: - case X86::MOV64r0: - return true; + case X86::MOV64r0: return true; default: break; } OpcodeTablePtr = &RegOp2MemOpTable0; @@ -2569,22 +2567,17 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, } else if (OpNum == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; } - - if (OpcodeTablePtr) { - // Find the Opcode to fuse - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = - OpcodeTablePtr->find((unsigned*)Opc); - if (I != OpcodeTablePtr->end()) - return true; - } + + if (OpcodeTablePtr && OpcodeTablePtr->count(Opc)) + return true; return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops); } bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl<MachineInstr*> &NewMIs) const { - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = - MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); + DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = + MemOp2RegOpTable.find(MI->getOpcode()); if (I == MemOp2RegOpTable.end()) return false; unsigned Opc = I->second.first; @@ -2644,7 +2637,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the data processing instruction. MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); MachineInstrBuilder MIB(DataMI); - + if (FoldedStore) MIB.addReg(Reg, RegState::Define); for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) @@ -2712,8 +2705,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, if (!N->isMachineOpcode()) return false; - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = - MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); + DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = + MemOp2RegOpTable.find(N->getMachineOpcode()); if (I == MemOp2RegOpTable.end()) return false; unsigned Opc = I->second.first; @@ -2813,8 +2806,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex) const { - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = - MemOp2RegOpTable.find((unsigned*)Opc); + DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = + MemOp2RegOpTable.find(Opc); if (I == MemOp2RegOpTable.end()) return 0; bool FoldedLoad = I->second.second & (1 << 4); @@ -2993,6 +2986,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: + case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11: + case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15: return true; } return false; @@ -3090,6 +3085,41 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } +bool X86InstrInfo:: +hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + switch (DefMI->getOpcode()) { + default: return false; + case X86::DIVSDrm: + case X86::DIVSDrm_Int: + case X86::DIVSDrr: + case X86::DIVSDrr_Int: + case X86::DIVSSrm: + case X86::DIVSSrm_Int: + case X86::DIVSSrr: + case X86::DIVSSrr_Int: + case X86::SQRTPDm: + case X86::SQRTPDm_Int: + case X86::SQRTPDr: + case X86::SQRTPDr_Int: + case X86::SQRTPSm: + case X86::SQRTPSm_Int: + case X86::SQRTPSr: + case X86::SQRTPSr_Int: + case X86::SQRTSDm: + case X86::SQRTSDm_Int: + case X86::SQRTSDr: + case X86::SQRTSDr_Int: + case X86::SQRTSSm: + case X86::SQRTSSm_Int: + case X86::SQRTSSr: + case X86::SQRTSSr_Int: + return true; + } +} + namespace { /// CGBR - Create Global Base Reg pass. This initializes the PIC /// global base register for x86-32. @@ -3108,6 +3138,13 @@ namespace { if (TM->getRelocationModel() != Reloc::PIC_) return false; + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); + + // If we didn't need a GlobalBaseReg, don't insert code. + if (GlobalBaseReg == 0) + return false; + // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF.front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); @@ -3119,16 +3156,15 @@ namespace { if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); else - PC = TII->getGlobalBaseReg(&MF); - + PC = GlobalBaseReg; + // Operand of MovePCtoStack is completely ignored by asm printer. It's // only used in JIT code emission as displacement to pc. BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); - + // If we're using vanilla 'GOT' PIC style, we should use relative addressing // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) { - unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF); // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index f336206..1d44207 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -174,7 +174,7 @@ namespace X86II { /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the /// reference is actually to the "FOO$stub" symbol. This is used for calls - /// and jumps to external functions on Tiger and before. + /// and jumps to external functions on Tiger and earlier. MO_DARWIN_STUB, /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the @@ -311,12 +311,17 @@ namespace X86II { MRM_F0 = 40, MRM_F8 = 41, MRM_F9 = 42, + + /// RawFrmImm8 - This is used for the ENTER instruction, which has two + /// immediates, the first of which is a 16-bit immediate (specified by + /// the imm encoding) and the second is a 8-bit fixed value. + RawFrmImm8 = 43, /// RawFrmImm16 - This is used for CALL FAR instructions, which have two /// immediates, the first of which is a 16 or 32-bit immediate (specified by /// the imm encoding) and the second is a 16-bit fixed value. In the AMD /// manual, this operand is described as pntr16:32 and pntr16:16 - RawFrmImm16 = 43, + RawFrmImm16 = 44, FormMask = 63, @@ -444,28 +449,36 @@ namespace X86II { OpcodeMask = 0xFF << OpcodeShift, //===------------------------------------------------------------------===// - // VEX - The opcode prefix used by AVX instructions + /// VEX - The opcode prefix used by AVX instructions VEX = 1U << 0, - // VEX_W - Has a opcode specific functionality, but is used in the same - // way as REX_W is for regular SSE instructions. + /// VEX_W - Has a opcode specific functionality, but is used in the same + /// way as REX_W is for regular SSE instructions. VEX_W = 1U << 1, - // VEX_4V - Used to specify an additional AVX/SSE register. Several 2 - // address instructions in SSE are represented as 3 address ones in AVX - // and the additional register is encoded in VEX_VVVV prefix. + /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2 + /// address instructions in SSE are represented as 3 address ones in AVX + /// and the additional register is encoded in VEX_VVVV prefix. VEX_4V = 1U << 2, - // VEX_I8IMM - Specifies that the last register used in a AVX instruction, - // must be encoded in the i8 immediate field. This usually happens in - // instructions with 4 operands. + /// VEX_I8IMM - Specifies that the last register used in a AVX instruction, + /// must be encoded in the i8 immediate field. This usually happens in + /// instructions with 4 operands. VEX_I8IMM = 1U << 3, - // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current - // instruction uses 256-bit wide registers. This is usually auto detected if - // a VR256 register is used, but some AVX instructions also have this field - // marked when using a f256 memory references. - VEX_L = 1U << 4 + /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current + /// instruction uses 256-bit wide registers. This is usually auto detected + /// if a VR256 register is used, but some AVX instructions also have this + /// field marked when using a f256 memory references. + VEX_L = 1U << 4, + + /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the + /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents + /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction + /// storing a classifier in the imm8 field. To simplify our implementation, + /// we handle this by storeing the classifier in the opcode field and using + /// this flag to indicate that the encoder should do the wacky 3DNow! thing. + Has3DNow0F0FOpcode = 1U << 5 }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the @@ -528,6 +541,7 @@ namespace X86II { case X86II::AddRegFrm: case X86II::MRMDestReg: case X86II::MRMSrcReg: + case X86II::RawFrmImm8: case X86II::RawFrmImm16: return -1; case X86II::MRMDestMem: @@ -599,14 +613,14 @@ class X86InstrInfo : public TargetInstrInfoImpl { /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, /// RegOp2MemOpTable2 - Load / store folding opcode maps. /// - DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr; - DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable0; - DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable1; - DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2; + DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr; + DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0; + DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1; + DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2; /// MemOp2RegOpTable - Load / store unfolding opcode map. /// - DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable; + DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable; public: explicit X86InstrInfo(X86TargetMachine &tm); @@ -728,17 +742,6 @@ public: MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const; - - virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - - virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -845,18 +848,23 @@ public: /// SetSSEDomain - Set the SSEDomain of MI. void SetSSEDomain(MachineInstr *MI, unsigned Domain) const; + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + unsigned OpNum, + const SmallVectorImpl<MachineOperand> &MOs, + unsigned Size, unsigned Alignment) const; + + bool hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const; + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - unsigned OpNum, - const SmallVectorImpl<MachineOperand> &MOs, - unsigned Size, unsigned Alignment) const; - /// isFrameOperand - Return true and the FrameIndex if the specified /// operand and follow operands form a reference to the stack frame. bool isFrameOperand(const MachineInstr *MI, unsigned int Op, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 09b7721..87dc4be 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1,10 +1,10 @@ -//===----------------------------------------------------------------------===// -// +//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the X86 instruction set, defining the instructions, and @@ -35,6 +35,20 @@ def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>, SDTCisVT<1, i32>]>; + +// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS +def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisVT<4, i32>]>; +// RES1, RES2, FLAGS = op LHS, RHS +def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, SDTCisVT<1, i32>]>; def SDTX86BrCond : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; @@ -46,7 +60,7 @@ def SDTX86SetCC_C : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; -def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, +def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i8>]>; def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -64,6 +78,12 @@ def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, SDTCisVT<1, iPTR>, SDTCisVT<2, iPTR>]>; +def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, i32>, + SDTCisVT<3, i8>, + SDTCisVT<4, i32>]>; + def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; def SDTX86Void : SDTypeProfile<0, 0, []>; @@ -72,9 +92,7 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; - -def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>; +def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; @@ -110,82 +128,85 @@ def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>; def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, - SDNPMayLoad]>; + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, - SDNPMayLoad]>; + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, - [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def X86vastart_save_xmm_regs : SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", SDT_X86VASTART_SAVE_XMM_REGS, [SDNPHasChain, SDNPVariadic]>; - +def X86vaarg64 : + SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, + SDNPMemOperand]>; def X86callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; def X86callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def X86call : SDNode<"X86ISD::CALL", SDT_X86Call, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>; + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>; def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void, - [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>; + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; -def X86SegmentBaseAddress : SDNode<"X86ISD::SegmentBaseAddress", - SDT_X86SegmentBaseAddress, []>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; -def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, - [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; +def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>; def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags, [SDNPCommutative]>; -def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags, +def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags, [SDNPCommutative]>; - +def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>; +def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>; + def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags, @@ -197,11 +218,11 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; -def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; - +def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, - []>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; //===----------------------------------------------------------------------===// // X86 Operand Definitions. @@ -252,6 +273,10 @@ def i8mem_NOREX : Operand<i64> { let ParserMatchClass = X86MemAsmOperand; } +// GPRs available for tailcall. +// It represents GR64_TC or GR64_TCW64. +def ptr_rc_tailcall : PointerLikeRegClass<2>; + // Special i32mem for addresses of load folding tail calls. These are not // allowed to use callee-saved registers since they must be scheduled // after callee-saved register are popped. @@ -261,6 +286,15 @@ def i32mem_TC : Operand<i32> { let ParserMatchClass = X86MemAsmOperand; } +// Special i64mem for addresses of load folding tail calls. These are not +// allowed to use callee-saved registers since they must be scheduled +// after callee-saved register are popped. +def i64mem_TC : Operand<i64> { + let PrintMethod = "printi64mem"; + let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, + ptr_rc_tailcall, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} let ParserMatchClass = X86AbsMemAsmOperand, PrintMethod = "print_pcrel_imm" in { @@ -332,43 +366,77 @@ def i32i8imm : Operand<i32> { let ParserMatchClass = ImmSExti32i8AsmOperand; } +// 64-bits but only 32 bits are significant. +def i64i32imm : Operand<i64> { + let ParserMatchClass = ImmSExti64i32AsmOperand; +} + +// 64-bits but only 32 bits are significant, and those bits are treated as being +// pc relative. +def i64i32imm_pcrel : Operand<i64> { + let PrintMethod = "print_pcrel_imm"; + let ParserMatchClass = X86AbsMemAsmOperand; +} + +// 64-bits but only 8 bits are significant. +def i64i8imm : Operand<i64> { + let ParserMatchClass = ImmSExti64i8AsmOperand; +} + +def lea64_32mem : Operand<i32> { + let PrintMethod = "printi32mem"; + let AsmOperandLowerMethod = "lower_lea64_32mem"; + let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + + //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. // // Define X86 specific addressing mode. -def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>; +def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>; def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr", [add, sub, mul, X86mul_imm, shl, or, frameindex], []>; def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; +def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr", + [add, sub, mul, X86mul_imm, shl, or, frameindex, + X86WrapperRIP], []>; + +def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr", + [tglobaltlsaddr], []>; + //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. def HasCMov : Predicate<"Subtarget->hasCMov()">; def NoCMov : Predicate<"!Subtarget->hasCMov()">; -// FIXME: temporary hack to let codegen assert or generate poor code in case -// no AVX version of the desired intructions is present, this is better for -// incremental dev (without fallbacks it's easier to spot what's missing) -def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">; -def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">; -def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">; -def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">; -def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">; -def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">; -def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">; -def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">; +def HasMMX : Predicate<"Subtarget->hasMMX()">; +def Has3DNow : Predicate<"Subtarget->has3DNow()">; +def Has3DNowA : Predicate<"Subtarget->has3DNowA()">; +def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; +def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; +def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; +def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; +def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; +def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; +def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; +def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">; + +def HasAES : Predicate<"Subtarget->hasAES()">; def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">; def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; -def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; -def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; -def In32BitMode : Predicate<"!Subtarget->is64Bit()">; -def In64BitMode : Predicate<"Subtarget->is64Bit()">; +def FPStackf32 : Predicate<"!Subtarget->hasXMM()">; +def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">; +def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate; +def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; @@ -383,7 +451,6 @@ def OptForSize : Predicate<"OptForSize">; def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; -def HasAES : Predicate<"Subtarget->hasAES()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. @@ -418,40 +485,24 @@ def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>; def i16immSExt8 : PatLeaf<(i16 immSext8)>; def i32immSExt8 : PatLeaf<(i32 immSext8)>; - -/// Load patterns: these constraint the match to the right address space. -def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; - -def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - return PT->getAddressSpace() == 256; - return false; +def i64immSExt8 : PatLeaf<(i64 immSext8)>; +def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>; +def i64immZExt32 : PatLeaf<(i64 imm), [{ + // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit + // unsignedsign extended field. + return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue(); }]>; -def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - return PT->getAddressSpace() == 257; - return false; +def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{ + uint64_t v = N->getZExtValue(); + return v == (uint32_t)v && (int32_t)v == (int8_t)v; }]>; - // Helper fragments for loads. // It's always safe to treat a anyext i16 load as a i32 load if the i16 is // known to be 32-bit aligned or better. Ditto for i8 to i16. def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{ LoadSDNode *LD = cast<LoadSDNode>(N); - if (const Value *Src = LD->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) return true; @@ -462,10 +513,6 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{ def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{ LoadSDNode *LD = cast<LoadSDNode>(N); - if (const Value *Src = LD->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::EXTLOAD) return LD->getAlignment() >= 2 && !LD->isVolatile(); @@ -474,10 +521,6 @@ def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ LoadSDNode *LD = cast<LoadSDNode>(N); - if (const Value *Src = LD->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) return true; @@ -486,15 +529,18 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ return false; }]>; -def loadi8 : PatFrag<(ops node:$ptr), (i8 (dsload node:$ptr))>; -def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>; -def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>; -def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>; -def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>; +def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>; +def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>; +def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; +def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>; +def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>; def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>; def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>; +def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>; +def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>; +def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>; def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>; def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>; @@ -502,6 +548,10 @@ def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>; def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>; def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>; def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>; +def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>; +def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>; +def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>; +def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>; def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>; def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>; @@ -509,6 +559,10 @@ def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>; def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>; def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>; def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>; +def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>; +def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>; +def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>; +def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>; // An 'and' node with a single use. @@ -524,66 +578,10 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ return N->hasOneUse(); }]>; -// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. -def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) - return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); - APInt KnownZero0, KnownOne0; - CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); - APInt KnownZero1, KnownOne1; - CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); - return (~KnownZero0 & ~KnownZero1) == 0; -}]>; - //===----------------------------------------------------------------------===// -// Instruction list... +// Instruction list. // -// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into -// a stack adjustment and the codegen must know that they may modify the stack -// pointer before prolog-epilog rewriting occurs. -// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become -// sub / add which can clobber EFLAGS. -let Defs = [ESP, EFLAGS], Uses = [ESP] in { -def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt), - "#ADJCALLSTACKDOWN", - [(X86callseq_start timm:$amt)]>, - Requires<[In32BitMode]>; -def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), - "#ADJCALLSTACKUP", - [(X86callseq_end timm:$amt1, timm:$amt2)]>, - Requires<[In32BitMode]>; -} - -// x86-64 va_start lowering magic. -let usesCustomInserter = 1 in { -def VASTART_SAVE_XMM_REGS : I<0, Pseudo, - (outs), - (ins GR8:$al, - i64imm:$regsavefi, i64imm:$offset, - variable_ops), - "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", - [(X86vastart_save_xmm_regs GR8:$al, - imm:$regsavefi, - imm:$offset)]>; - -// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets. Calls -// to _alloca is needed to probe the stack when allocating more than 4k bytes in -// one go. Touching the stack at 4K increments is necessary to ensure that the -// guard pages used by the OS virtual memory manager are allocated in correct -// sequence. -// The main point of having separate instruction are extra unmodelled effects -// (compared to ordinary calls) like stack pointer change. - -let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in - def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins), - "# dynamic stack allocation", - [(X86MingwAlloca)]>; -} - // Nop let neverHasSideEffects = 1 in { def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>; @@ -593,206 +591,22 @@ let neverHasSideEffects = 1 in { "nop{l}\t$zero", []>, TB; } -// Trap -let Uses = [EFLAGS] in { - def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; -} -def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", - [(int_x86_int (i8 3))]>; -// FIXME: need to make sure that "int $3" matches int3 -def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", - [(int_x86_int imm:$trap)]>; -def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize; -def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>; - -// PIC base construction. This expands to code that looks like this: -// call $next_inst -// popl %destreg" -let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in - def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label), - "", []>; - -//===----------------------------------------------------------------------===// -// Control Flow Instructions. -// - -// Return instructions. -let isTerminator = 1, isReturn = 1, isBarrier = 1, - hasCtrlDep = 1, FPForm = SpecialFP in { - def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), - "ret", - [(X86retflag 0)]>; - def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), - "ret\t$amt", - [(X86retflag timm:$amt)]>; - def LRET : I <0xCB, RawFrm, (outs), (ins), - "lret", []>; - def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lret\t$amt", []>; -} - -// Unconditional branches. -let isBarrier = 1, isBranch = 1, isTerminator = 1 in { - def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), - "jmp\t$dst", [(br bb:$dst)]>; - def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), - "jmp\t$dst", []>; -} - -// Conditional Branches. -let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { - multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { - def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>; - def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm, - [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB; - } -} - -defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>; -defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>; -defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>; -defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>; -defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>; -defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>; -defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>; -defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>; -defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>; -defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>; -defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>; -defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>; -defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>; -defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>; -defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; -defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; - -// FIXME: What about the CX/RCX versions of this instruction? -let Uses = [ECX], isBranch = 1, isTerminator = 1 in - def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jcxz\t$dst", []>; - - -// Indirect branches -let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { - def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", - [(brind GR32:$dst)]>, Requires<[In32BitMode]>; - def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", - [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>; - - def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), - (ins i16imm:$off, i16imm:$seg), - "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize; - def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs), - (ins i32imm:$off, i16imm:$seg), - "ljmp{l}\t{$seg, $off|$off, $seg}", []>; - - def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), - "ljmp{w}\t{*}$dst", []>, OpSize; - def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst), - "ljmp{l}\t{*}$dst", []>; -} - - -// Loop instructions - -def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>; -def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>; -def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>; - -//===----------------------------------------------------------------------===// -// Call Instructions... -// -let isCall = 1 in - // All calls clobber the non-callee saved registers. ESP is marked as - // a use to prevent stack-pointer assignments that appear immediately - // before calls from potentially appearing dead. Uses for argument - // registers are added manually. - let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in { - def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, - (outs), (ins i32imm_pcrel:$dst,variable_ops), - "call\t$dst", []>; - def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), - "call\t{*}$dst", [(X86call GR32:$dst)]>; - def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), - "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>; - - def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), - (ins i16imm:$off, i16imm:$seg), - "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize; - def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs), - (ins i32imm:$off, i16imm:$seg), - "lcall{l}\t{$seg, $off|$off, $seg}", []>; - - def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), - "lcall{w}\t{*}$dst", []>, OpSize; - def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), - "lcall{l}\t{*}$dst", []>; - - // callw for 16 bit code for the assembler. - let isAsmParserOnly = 1 in - def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm, - (outs), (ins i16imm_pcrel:$dst, variable_ops), - "callw\t$dst", []>, OpSize; - } // Constructing a stack frame. +def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl), + "enter\t$len, $lvl", []>; -def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl), - "enter\t$len, $lvl", []>; - -// Tail call stuff. - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1 in - let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in { - def TCRETURNdi : I<0, Pseudo, (outs), - (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), - "#TC_RETURN $dst $offset", []>; - def TCRETURNri : I<0, Pseudo, (outs), - (ins GR32_TC:$dst, i32imm:$offset, variable_ops), - "#TC_RETURN $dst $offset", []>; - let mayLoad = 1 in - def TCRETURNmi : I<0, Pseudo, (outs), - (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), - "#TC_RETURN $dst $offset", []>; - - // FIXME: The should be pseudo instructions that are lowered when going to - // mcinst. - def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs), - (ins i32imm_pcrel:$dst, variable_ops), - "jmp\t$dst # TAILCALL", - []>; - def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), - "", []>; // FIXME: Remove encoding when JIT is dead. - let mayLoad = 1 in - def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), - "jmp{l}\t{*}$dst # TAILCALL", []>; -} - -//===----------------------------------------------------------------------===// -// Miscellaneous Instructions... -// let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in def LEAVE : I<0xC9, RawFrm, (outs), (ins), "leave", []>, Requires<[In32BitMode]>; -def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; -let mayLoad = 1 in -def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; -def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; -let mayLoad = 1 in -def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; +let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in +def LEAVE64 : I<0xC9, RawFrm, + (outs), (ins), "leave", []>, Requires<[In64BitMode]>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +// let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in { let mayLoad = 1 in { @@ -805,6 +619,10 @@ def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>, OpSize; def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>; def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>; + +def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize; +def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>, + Requires<[In32BitMode]>; } let mayStore = 1 in { @@ -817,29 +635,54 @@ def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>, OpSize; def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>; def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>; -} -} -let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in { -def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), +def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), "push{l}\t$imm", []>; -def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), +def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), "push{w}\t$imm", []>, OpSize; -def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), +def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), "push{l}\t$imm", []>; -} -let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in { -def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize; -def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>, - Requires<[In32BitMode]>; -} -let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in { def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize; def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>, Requires<[In32BitMode]>; + +} +} + +let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { +let mayLoad = 1 in { +def POP64r : I<0x58, AddRegFrm, + (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; +def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; +def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>; +} +let mayStore = 1 in { +def PUSH64r : I<0x50, AddRegFrm, + (outs), (ins GR64:$reg), "push{q}\t$reg", []>; +def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>; +def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>; +} } +let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { +def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), + "push{q}\t$imm", []>; +def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), + "push{q}\t$imm", []>; +def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm), + "push{q}\t$imm", []>; +} + +let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in +def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>, + Requires<[In64BitMode]>; +let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in +def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>, + Requires<[In64BitMode]>; + + + let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], mayLoad=1, neverHasSideEffects=1 in { def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>, @@ -851,12 +694,16 @@ def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>, Requires<[In32BitMode]>; } -let Uses = [EFLAGS], Constraints = "$src = $dst" in // GR32 = bswap GR32 - def BSWAP32r : I<0xC8, AddRegFrm, - (outs GR32:$dst), (ins GR32:$src), - "bswap{l}\t$dst", - [(set GR32:$dst, (bswap GR32:$src))]>, TB; +let Constraints = "$src = $dst" in { // GR32 = bswap GR32 +def BSWAP32r : I<0xC8, AddRegFrm, + (outs GR32:$dst), (ins GR32:$src), + "bswap{l}\t$dst", + [(set GR32:$dst, (bswap GR32:$src))]>, TB; +def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), + "bswap{q}\t$dst", + [(set GR64:$dst, (bswap GR64:$src))]>, TB; +} // Constraints = "$src = $dst" // Bit scan instructions. let Defs = [EFLAGS] in { @@ -873,6 +720,12 @@ def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB; +def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "bsf{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB; +def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "bsf{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB; def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", @@ -887,44 +740,23 @@ def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB; +def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "bsr{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB; +def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "bsr{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB; } // Defs = [EFLAGS] -let neverHasSideEffects = 1 in -def LEA16r : I<0x8D, MRMSrcMem, - (outs GR16:$dst), (ins i32mem:$src), - "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; -let isReMaterializable = 1 in -def LEA32r : I<0x8D, MRMSrcMem, - (outs GR32:$dst), (ins i32mem:$src), - "lea{l}\t{$src|$dst}, {$dst|$src}", - [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; - -let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { -def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", - [(X86rep_movs i8)]>, REP; -def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", - [(X86rep_movs i16)]>, REP, OpSize; -def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", - [(X86rep_movs i32)]>, REP; -} // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in { def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>; def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize; def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>; +def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>; } -let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", - [(X86rep_stos i8)]>, REP; -let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", - [(X86rep_stos i16)]>, REP, OpSize; -let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", - [(X86rep_stos i32)]>, REP; - // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>; @@ -932,91 +764,24 @@ let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize; let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>; +let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in +def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>; def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>; def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize; def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>; +def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>; def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>; def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize; def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>; - -let Defs = [RAX, RDX] in -def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, - TB; - -let Defs = [RAX, RCX, RDX] in -def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; - -let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in { -def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; -} - -def SYSCALL : I<0x05, RawFrm, - (outs), (ins), "syscall", []>, TB; -def SYSRET : I<0x07, RawFrm, - (outs), (ins), "sysret", []>, TB; -def SYSENTER : I<0x34, RawFrm, - (outs), (ins), "sysenter", []>, TB; -def SYSEXIT : I<0x35, RawFrm, - (outs), (ins), "sysexit", []>, TB, Requires<[In32BitMode]>; - -def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>; +def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; //===----------------------------------------------------------------------===// -// Input/Output Instructions... +// Move Instructions. // -let Defs = [AL], Uses = [DX] in -def IN8rr : I<0xEC, RawFrm, (outs), (ins), - "in{b}\t{%dx, %al|%AL, %DX}", []>; -let Defs = [AX], Uses = [DX] in -def IN16rr : I<0xED, RawFrm, (outs), (ins), - "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize; -let Defs = [EAX], Uses = [DX] in -def IN32rr : I<0xED, RawFrm, (outs), (ins), - "in{l}\t{%dx, %eax|%EAX, %DX}", []>; - -let Defs = [AL] in -def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i16i8imm:$port), - "in{b}\t{$port, %al|%AL, $port}", []>; -let Defs = [AX] in -def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port), - "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize; -let Defs = [EAX] in -def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port), - "in{l}\t{$port, %eax|%EAX, $port}", []>; - -let Uses = [DX, AL] in -def OUT8rr : I<0xEE, RawFrm, (outs), (ins), - "out{b}\t{%al, %dx|%DX, %AL}", []>; -let Uses = [DX, AX] in -def OUT16rr : I<0xEF, RawFrm, (outs), (ins), - "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize; -let Uses = [DX, EAX] in -def OUT32rr : I<0xEF, RawFrm, (outs), (ins), - "out{l}\t{%eax, %dx|%DX, %EAX}", []>; - -let Uses = [AL] in -def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i16i8imm:$port), - "out{b}\t{%al, $port|$port, %AL}", []>; -let Uses = [AX] in -def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port), - "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize; -let Uses = [EAX] in -def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port), - "out{l}\t{%eax, $port|$port, %EAX}", []>; - -def IN8 : I<0x6C, RawFrm, (outs), (ins), - "ins{b}", []>; -def IN16 : I<0x6D, RawFrm, (outs), (ins), - "ins{w}", []>, OpSize; -def IN32 : I<0x6D, RawFrm, (outs), (ins), - "ins{l}", []>; -//===----------------------------------------------------------------------===// -// Move Instructions... -// let neverHasSideEffects = 1 in { def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", []>; @@ -1024,6 +789,8 @@ def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; +def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src), @@ -1035,6 +802,12 @@ def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src), def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, imm:$src)]>; +def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), + "movabs{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, imm:$src)]>; +def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, i64immSExt32:$src)]>; } def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), @@ -1046,6 +819,9 @@ def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src), def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; +def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + [(store i64immSExt32:$src, addr:$dst)]>; /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. @@ -1067,24 +843,22 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, %eax}", []>, Requires<[In32BitMode]>; - -// Moves to and from segment registers -def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; -def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; -def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; -def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; + +// FIXME: These definitions are utterly broken +// Just leave them commented out for now because they're useless outside +// of the large code model, and most compilers won't generate the instructions +// in question. +/* +def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src), + "mov{q}\t{$src, %rax|%rax, $src}", []>; +def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src), + "mov{q}\t{$src, %rax|%rax, $src}", []>; +def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins), + "mov{q}\t{%rax, $dst|$dst, %rax}", []>; +def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), + "mov{q}\t{%rax, $dst|$dst, %rax}", []>; +*/ + let isCodeGenOnly = 1 in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), @@ -1093,6 +867,8 @@ def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; +def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; } let canFoldAsLoad = 1, isReMaterializable = 1 in { @@ -1105,6 +881,9 @@ def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (loadi32 addr:$src))]>; +def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (load addr:$src))]>; } def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), @@ -1116,24 +895,9 @@ def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store GR32:$src, addr:$dst)]>; - -/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC. -let isCodeGenOnly = 1 in { -let neverHasSideEffects = 1 in -def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; - -let mayLoad = 1, - canFoldAsLoad = 1, isReMaterializable = 1 in -def MOV32rm_TC : I<0x8B, MRMSrcMem, (outs GR32_TC:$dst), (ins i32mem_TC:$src), - "mov{l}\t{$src, $dst|$dst, $src}", - []>; - -let mayStore = 1 in -def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src), - "mov{l}\t{$src, $dst|$dst, $src}", - []>; -} +def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + [(store GR64:$src, addr:$dst)]>; // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so // that they can be used for copying and storing h registers, which can't be @@ -1154,2219 +918,6 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem, "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; } -// Moves to and from debug registers -def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; - -// Moves to and from control registers -def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; - -//===----------------------------------------------------------------------===// -// Fixed-Register Multiplication and Division Instructions... -// - -// Extra precision multiplication - -// AL is really implied by AX, but the registers in Defs must match the -// SDNode results (i8, i32). -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", - // FIXME: Used for 8-bit mul, ignore result upper 8 bits. - // This probably ought to be moved to a def : Pat<> if the - // syntax can be accepted. - [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)]>; // AL,AH = AL*GR8 - -let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in -def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), - "mul{w}\t$src", - []>, OpSize; // AX,DX = AX*GR16 - -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in -def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), - "mul{l}\t$src", - []>; // EAX,EDX = EAX*GR32 - -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), - "mul{b}\t$src", - // FIXME: Used for 8-bit mul, ignore result upper 8 bits. - // This probably ought to be moved to a def : Pat<> if the - // syntax can be accepted. - [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)]>; // AL,AH = AL*[mem8] - -let mayLoad = 1, neverHasSideEffects = 1 in { -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), - "mul{w}\t$src", - []>, OpSize; // AX,DX = AX*[mem16] - -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), - "mul{l}\t$src", - []>; // EAX,EDX = EAX*[mem32] -} - -let neverHasSideEffects = 1 in { -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; - // AL,AH = AL*GR8 -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>, - OpSize; // AX,DX = AX*GR16 -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; - // EAX,EDX = EAX*GR32 -let mayLoad = 1 in { -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", []>; // AL,AH = AL*[mem8] -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16] -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32] -} -} // neverHasSideEffects - -// unsigned division/remainder -let Defs = [AL,EFLAGS,AX], Uses = [AX] in -def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH - "div{b}\t$src", []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX - "div{w}\t$src", []>, OpSize; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX - "div{l}\t$src", []>; -let mayLoad = 1 in { -let Defs = [AL,EFLAGS,AX], Uses = [AX] in -def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "div{b}\t$src", []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "div{w}\t$src", []>, OpSize; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in - // EDX:EAX/[mem32] = EAX,EDX -def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), - "div{l}\t$src", []>; -} - -// Signed division/remainder. -let Defs = [AL,EFLAGS,AX], Uses = [AX] in -def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH - "idiv{b}\t$src", []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX - "idiv{w}\t$src", []>, OpSize; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX - "idiv{l}\t$src", []>; -let mayLoad = 1, mayLoad = 1 in { -let Defs = [AL,EFLAGS,AX], Uses = [AX] in -def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "idiv{b}\t$src", []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "idiv{w}\t$src", []>, OpSize; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), - // EDX:EAX/[mem32] = EAX,EDX - "idiv{l}\t$src", []>; -} - -//===----------------------------------------------------------------------===// -// Two address Instructions. -// -let Constraints = "$src1 = $dst" in { - -// Conditional moves -let Uses = [EFLAGS] in { - -let Predicates = [HasCMov] in { -let isCommutable = 1 in { -def CMOVB16rr : I<0x42, MRMSrcReg, // if <u, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovb{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_B, EFLAGS))]>, - TB, OpSize; -def CMOVB32rr : I<0x42, MRMSrcReg, // if <u, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovb{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_B, EFLAGS))]>, - TB; -def CMOVAE16rr: I<0x43, MRMSrcReg, // if >=u, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovae{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_AE, EFLAGS))]>, - TB, OpSize; -def CMOVAE32rr: I<0x43, MRMSrcReg, // if >=u, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovae{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_AE, EFLAGS))]>, - TB; -def CMOVE16rr : I<0x44, MRMSrcReg, // if ==, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmove{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_E, EFLAGS))]>, - TB, OpSize; -def CMOVE32rr : I<0x44, MRMSrcReg, // if ==, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmove{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_E, EFLAGS))]>, - TB; -def CMOVNE16rr: I<0x45, MRMSrcReg, // if !=, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovne{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_NE, EFLAGS))]>, - TB, OpSize; -def CMOVNE32rr: I<0x45, MRMSrcReg, // if !=, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovne{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_NE, EFLAGS))]>, - TB; -def CMOVBE16rr: I<0x46, MRMSrcReg, // if <=u, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovbe{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_BE, EFLAGS))]>, - TB, OpSize; -def CMOVBE32rr: I<0x46, MRMSrcReg, // if <=u, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovbe{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_BE, EFLAGS))]>, - TB; -def CMOVA16rr : I<0x47, MRMSrcReg, // if >u, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmova{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_A, EFLAGS))]>, - TB, OpSize; -def CMOVA32rr : I<0x47, MRMSrcReg, // if >u, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmova{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_A, EFLAGS))]>, - TB; -def CMOVL16rr : I<0x4C, MRMSrcReg, // if <s, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovl{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_L, EFLAGS))]>, - TB, OpSize; -def CMOVL32rr : I<0x4C, MRMSrcReg, // if <s, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovl{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_L, EFLAGS))]>, - TB; -def CMOVGE16rr: I<0x4D, MRMSrcReg, // if >=s, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovge{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_GE, EFLAGS))]>, - TB, OpSize; -def CMOVGE32rr: I<0x4D, MRMSrcReg, // if >=s, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovge{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_GE, EFLAGS))]>, - TB; -def CMOVLE16rr: I<0x4E, MRMSrcReg, // if <=s, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovle{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_LE, EFLAGS))]>, - TB, OpSize; -def CMOVLE32rr: I<0x4E, MRMSrcReg, // if <=s, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovle{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_LE, EFLAGS))]>, - TB; -def CMOVG16rr : I<0x4F, MRMSrcReg, // if >s, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovg{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_G, EFLAGS))]>, - TB, OpSize; -def CMOVG32rr : I<0x4F, MRMSrcReg, // if >s, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovg{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_G, EFLAGS))]>, - TB; -def CMOVS16rr : I<0x48, MRMSrcReg, // if signed, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovs{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_S, EFLAGS))]>, - TB, OpSize; -def CMOVS32rr : I<0x48, MRMSrcReg, // if signed, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovs{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_S, EFLAGS))]>, - TB; -def CMOVNS16rr: I<0x49, MRMSrcReg, // if !signed, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovns{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_NS, EFLAGS))]>, - TB, OpSize; -def CMOVNS32rr: I<0x49, MRMSrcReg, // if !signed, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovns{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_NS, EFLAGS))]>, - TB; -def CMOVP16rr : I<0x4A, MRMSrcReg, // if parity, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovp{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_P, EFLAGS))]>, - TB, OpSize; -def CMOVP32rr : I<0x4A, MRMSrcReg, // if parity, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovp{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_P, EFLAGS))]>, - TB; -def CMOVNP16rr : I<0x4B, MRMSrcReg, // if !parity, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovnp{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_NP, EFLAGS))]>, - TB, OpSize; -def CMOVNP32rr : I<0x4B, MRMSrcReg, // if !parity, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovnp{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_NP, EFLAGS))]>, - TB; -def CMOVO16rr : I<0x40, MRMSrcReg, // if overflow, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovo{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_O, EFLAGS))]>, - TB, OpSize; -def CMOVO32rr : I<0x40, MRMSrcReg, // if overflow, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovo{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_O, EFLAGS))]>, - TB; -def CMOVNO16rr : I<0x41, MRMSrcReg, // if !overflow, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovno{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_NO, EFLAGS))]>, - TB, OpSize; -def CMOVNO32rr : I<0x41, MRMSrcReg, // if !overflow, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovno{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_NO, EFLAGS))]>, - TB; -} // isCommutable = 1 - -def CMOVB16rm : I<0x42, MRMSrcMem, // if <u, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovb{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_B, EFLAGS))]>, - TB, OpSize; -def CMOVB32rm : I<0x42, MRMSrcMem, // if <u, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovb{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_B, EFLAGS))]>, - TB; -def CMOVAE16rm: I<0x43, MRMSrcMem, // if >=u, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovae{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_AE, EFLAGS))]>, - TB, OpSize; -def CMOVAE32rm: I<0x43, MRMSrcMem, // if >=u, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovae{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_AE, EFLAGS))]>, - TB; -def CMOVE16rm : I<0x44, MRMSrcMem, // if ==, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmove{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_E, EFLAGS))]>, - TB, OpSize; -def CMOVE32rm : I<0x44, MRMSrcMem, // if ==, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmove{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_E, EFLAGS))]>, - TB; -def CMOVNE16rm: I<0x45, MRMSrcMem, // if !=, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovne{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_NE, EFLAGS))]>, - TB, OpSize; -def CMOVNE32rm: I<0x45, MRMSrcMem, // if !=, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovne{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_NE, EFLAGS))]>, - TB; -def CMOVBE16rm: I<0x46, MRMSrcMem, // if <=u, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovbe{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_BE, EFLAGS))]>, - TB, OpSize; -def CMOVBE32rm: I<0x46, MRMSrcMem, // if <=u, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovbe{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_BE, EFLAGS))]>, - TB; -def CMOVA16rm : I<0x47, MRMSrcMem, // if >u, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmova{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_A, EFLAGS))]>, - TB, OpSize; -def CMOVA32rm : I<0x47, MRMSrcMem, // if >u, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmova{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_A, EFLAGS))]>, - TB; -def CMOVL16rm : I<0x4C, MRMSrcMem, // if <s, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovl{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_L, EFLAGS))]>, - TB, OpSize; -def CMOVL32rm : I<0x4C, MRMSrcMem, // if <s, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovl{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_L, EFLAGS))]>, - TB; -def CMOVGE16rm: I<0x4D, MRMSrcMem, // if >=s, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovge{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_GE, EFLAGS))]>, - TB, OpSize; -def CMOVGE32rm: I<0x4D, MRMSrcMem, // if >=s, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovge{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_GE, EFLAGS))]>, - TB; -def CMOVLE16rm: I<0x4E, MRMSrcMem, // if <=s, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovle{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_LE, EFLAGS))]>, - TB, OpSize; -def CMOVLE32rm: I<0x4E, MRMSrcMem, // if <=s, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovle{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_LE, EFLAGS))]>, - TB; -def CMOVG16rm : I<0x4F, MRMSrcMem, // if >s, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovg{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_G, EFLAGS))]>, - TB, OpSize; -def CMOVG32rm : I<0x4F, MRMSrcMem, // if >s, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovg{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_G, EFLAGS))]>, - TB; -def CMOVS16rm : I<0x48, MRMSrcMem, // if signed, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovs{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_S, EFLAGS))]>, - TB, OpSize; -def CMOVS32rm : I<0x48, MRMSrcMem, // if signed, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovs{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_S, EFLAGS))]>, - TB; -def CMOVNS16rm: I<0x49, MRMSrcMem, // if !signed, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovns{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_NS, EFLAGS))]>, - TB, OpSize; -def CMOVNS32rm: I<0x49, MRMSrcMem, // if !signed, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovns{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_NS, EFLAGS))]>, - TB; -def CMOVP16rm : I<0x4A, MRMSrcMem, // if parity, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovp{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_P, EFLAGS))]>, - TB, OpSize; -def CMOVP32rm : I<0x4A, MRMSrcMem, // if parity, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovp{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_P, EFLAGS))]>, - TB; -def CMOVNP16rm : I<0x4B, MRMSrcMem, // if !parity, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovnp{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_NP, EFLAGS))]>, - TB, OpSize; -def CMOVNP32rm : I<0x4B, MRMSrcMem, // if !parity, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovnp{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_NP, EFLAGS))]>, - TB; -def CMOVO16rm : I<0x40, MRMSrcMem, // if overflow, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovo{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_O, EFLAGS))]>, - TB, OpSize; -def CMOVO32rm : I<0x40, MRMSrcMem, // if overflow, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovo{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_O, EFLAGS))]>, - TB; -def CMOVNO16rm : I<0x41, MRMSrcMem, // if !overflow, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovno{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_NO, EFLAGS))]>, - TB, OpSize; -def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovno{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_NO, EFLAGS))]>, - TB; -} // Predicates = [HasCMov] - -// X86 doesn't have 8-bit conditional moves. Use a customInserter to -// emit control flow. An alternative to this is to mark i8 SELECT as Promote, -// however that requires promoting the operands, and can induce additional -// i8 register pressure. Note that CMOV_GR8 is conservatively considered to -// clobber EFLAGS, because if one of the operands is zero, the expansion -// could involve an xor. -let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in { -def CMOV_GR8 : I<0, Pseudo, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond), - "#CMOV_GR8 PSEUDO!", - [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2, - imm:$cond, EFLAGS))]>; - -let Predicates = [NoCMov] in { -def CMOV_GR32 : I<0, Pseudo, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond), - "#CMOV_GR32* PSEUDO!", - [(set GR32:$dst, - (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>; -def CMOV_GR16 : I<0, Pseudo, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond), - "#CMOV_GR16* PSEUDO!", - [(set GR16:$dst, - (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>; -def CMOV_RFP32 : I<0, Pseudo, - (outs RFP32:$dst), - (ins RFP32:$src1, RFP32:$src2, i8imm:$cond), - "#CMOV_RFP32 PSEUDO!", - [(set RFP32:$dst, - (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond, - EFLAGS))]>; -def CMOV_RFP64 : I<0, Pseudo, - (outs RFP64:$dst), - (ins RFP64:$src1, RFP64:$src2, i8imm:$cond), - "#CMOV_RFP64 PSEUDO!", - [(set RFP64:$dst, - (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond, - EFLAGS))]>; -def CMOV_RFP80 : I<0, Pseudo, - (outs RFP80:$dst), - (ins RFP80:$src1, RFP80:$src2, i8imm:$cond), - "#CMOV_RFP80 PSEUDO!", - [(set RFP80:$dst, - (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, - EFLAGS))]>; -} // Predicates = [NoCMov] -} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] -} // Uses = [EFLAGS] - - -// unary instructions -let CodeSize = 2 in { -let Defs = [EFLAGS] in { -def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), - "neg{b}\t$dst", - [(set GR8:$dst, (ineg GR8:$src1)), - (implicit EFLAGS)]>; -def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1), - "neg{w}\t$dst", - [(set GR16:$dst, (ineg GR16:$src1)), - (implicit EFLAGS)]>, OpSize; -def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), - "neg{l}\t$dst", - [(set GR32:$dst, (ineg GR32:$src1)), - (implicit EFLAGS)]>; - -let Constraints = "" in { - def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), - "neg{b}\t$dst", - [(store (ineg (loadi8 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; - def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), - "neg{w}\t$dst", - [(store (ineg (loadi16 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>, OpSize; - def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), - "neg{l}\t$dst", - [(store (ineg (loadi32 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; -} // Constraints = "" -} // Defs = [EFLAGS] - -// Match xor -1 to not. Favors these over a move imm + xor to save code size. -let AddedComplexity = 15 in { -def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), - "not{b}\t$dst", - [(set GR8:$dst, (not GR8:$src1))]>; -def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1), - "not{w}\t$dst", - [(set GR16:$dst, (not GR16:$src1))]>, OpSize; -def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), - "not{l}\t$dst", - [(set GR32:$dst, (not GR32:$src1))]>; -} -let Constraints = "" in { - def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), - "not{b}\t$dst", - [(store (not (loadi8 addr:$dst)), addr:$dst)]>; - def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), - "not{w}\t$dst", - [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize; - def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), - "not{l}\t$dst", - [(store (not (loadi32 addr:$dst)), addr:$dst)]>; -} // Constraints = "" -} // CodeSize - -// TODO: inc/dec is slow for P4, but fast for Pentium-M. -let Defs = [EFLAGS] in { -let CodeSize = 2 in -def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), - "inc{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>; - -let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), - "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, - OpSize, Requires<[In32BitMode]>; -def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), - "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, - Requires<[In32BitMode]>; -} -let Constraints = "", CodeSize = 2 in { - def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", - [(store (add (loadi8 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>; - def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", - [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In32BitMode]>; - def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", - [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In32BitMode]>; -} // Constraints = "", CodeSize = 2 - -let CodeSize = 2 in -def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), - "dec{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>; -let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), - "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, - OpSize, Requires<[In32BitMode]>; -def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), - "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, - Requires<[In32BitMode]>; -} // CodeSize = 2 - -let Constraints = "", CodeSize = 2 in { - def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", - [(store (add (loadi8 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>; - def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", - [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In32BitMode]>; - def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", - [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In32BitMode]>; -} // Constraints = "", CodeSize = 2 -} // Defs = [EFLAGS] - -// Logical operators... -let Defs = [EFLAGS] in { -let isCommutable = 1 in { // X = AND Y, Z --> X = AND Z, Y -def AND8rr : I<0x20, MRMDestReg, - (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), - "and{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, GR8:$src2))]>; -def AND16rr : I<0x21, MRMDestReg, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "and{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1, - GR16:$src2))]>, OpSize; -def AND32rr : I<0x21, MRMDestReg, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "and{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, - GR32:$src2))]>; -} - -// AND instructions with the destination register in REG and the source register -// in R/M. Included for the disassembler. -let isCodeGenOnly = 1 in { -def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "and{b}\t{$src2, $dst|$dst, $src2}", []>; -def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "and{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; -def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "and{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -def AND8rm : I<0x22, MRMSrcMem, - (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), - "and{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, - (loadi8 addr:$src2)))]>; -def AND16rm : I<0x23, MRMSrcMem, - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "and{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1, - (loadi16 addr:$src2)))]>, - OpSize; -def AND32rm : I<0x23, MRMSrcMem, - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "and{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, - (loadi32 addr:$src2)))]>; - -def AND8ri : Ii8<0x80, MRM4r, - (outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2), - "and{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, - imm:$src2))]>; -def AND16ri : Ii16<0x81, MRM4r, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), - "and{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1, - imm:$src2))]>, OpSize; -def AND32ri : Ii32<0x81, MRM4r, - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), - "and{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, - imm:$src2))]>; -def AND16ri8 : Ii8<0x83, MRM4r, - (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), - "and{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1, - i16immSExt8:$src2))]>, - OpSize; -def AND32ri8 : Ii8<0x83, MRM4r, - (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), - "and{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, - i32immSExt8:$src2))]>; - -let Constraints = "" in { - def AND8mr : I<0x20, MRMDestMem, - (outs), (ins i8mem :$dst, GR8 :$src), - "and{b}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), GR8:$src), addr:$dst), - (implicit EFLAGS)]>; - def AND16mr : I<0x21, MRMDestMem, - (outs), (ins i16mem:$dst, GR16:$src), - "and{w}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), GR16:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def AND32mr : I<0x21, MRMDestMem, - (outs), (ins i32mem:$dst, GR32:$src), - "and{l}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), GR32:$src), addr:$dst), - (implicit EFLAGS)]>; - def AND8mi : Ii8<0x80, MRM4m, - (outs), (ins i8mem :$dst, i8imm :$src), - "and{b}\t{$src, $dst|$dst, $src}", - [(store (and (loadi8 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>; - def AND16mi : Ii16<0x81, MRM4m, - (outs), (ins i16mem:$dst, i16imm:$src), - "and{w}\t{$src, $dst|$dst, $src}", - [(store (and (loadi16 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def AND32mi : Ii32<0x81, MRM4m, - (outs), (ins i32mem:$dst, i32imm:$src), - "and{l}\t{$src, $dst|$dst, $src}", - [(store (and (loadi32 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>; - def AND16mi8 : Ii8<0x83, MRM4m, - (outs), (ins i16mem:$dst, i16i8imm :$src), - "and{w}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), i16immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def AND32mi8 : Ii8<0x83, MRM4m, - (outs), (ins i32mem:$dst, i32i8imm :$src), - "and{l}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>; - - def AND8i8 : Ii8<0x24, RawFrm, (outs), (ins i8imm:$src), - "and{b}\t{$src, %al|%al, $src}", []>; - def AND16i16 : Ii16<0x25, RawFrm, (outs), (ins i16imm:$src), - "and{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src), - "and{l}\t{$src, %eax|%eax, $src}", []>; - -} // Constraints = "" - - -let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y -def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst), - (ins GR8 :$src1, GR8 :$src2), - "or{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, GR8:$src2))]>; -def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,GR16:$src2))]>, - OpSize; -def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,GR32:$src2))]>; -} - -// OR instructions with the destination register in REG and the source register -// in R/M. Included for the disassembler. -let isCodeGenOnly = 1 in { -def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "or{b}\t{$src2, $dst|$dst, $src2}", []>; -def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "or{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; -def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "or{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst), - (ins GR8 :$src1, i8mem :$src2), - "or{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, - (load addr:$src2)))]>; -def OR16rm : I<0x0B, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$src1, i16mem:$src2), - "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1, - (load addr:$src2)))]>, - OpSize; -def OR32rm : I<0x0B, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$src1, i32mem:$src2), - "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1, - (load addr:$src2)))]>; - -def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst), - (ins GR8 :$src1, i8imm:$src2), - "or{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst,EFLAGS, (X86or_flag GR8:$src1, imm:$src2))]>; -def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst), - (ins GR16:$src1, i16imm:$src2), - "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1, - imm:$src2))]>, OpSize; -def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst), - (ins GR32:$src1, i32imm:$src2), - "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1, - imm:$src2))]>; - -def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst), - (ins GR16:$src1, i16i8imm:$src2), - "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1, - i16immSExt8:$src2))]>, OpSize; -def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst), - (ins GR32:$src1, i32i8imm:$src2), - "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1, - i32immSExt8:$src2))]>; -let Constraints = "" in { - def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), - "or{b}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), GR8:$src), addr:$dst), - (implicit EFLAGS)]>; - def OR16mr : I<0x09, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), - "or{w}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), GR16:$src), addr:$dst), - (implicit EFLAGS)]>, OpSize; - def OR32mr : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "or{l}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), GR32:$src), addr:$dst), - (implicit EFLAGS)]>; - def OR8mi : Ii8<0x80, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src), - "or{b}\t{$src, $dst|$dst, $src}", - [(store (or (loadi8 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>; - def OR16mi : Ii16<0x81, MRM1m, (outs), (ins i16mem:$dst, i16imm:$src), - "or{w}\t{$src, $dst|$dst, $src}", - [(store (or (loadi16 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def OR32mi : Ii32<0x81, MRM1m, (outs), (ins i32mem:$dst, i32imm:$src), - "or{l}\t{$src, $dst|$dst, $src}", - [(store (or (loadi32 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>; - def OR16mi8 : Ii8<0x83, MRM1m, (outs), (ins i16mem:$dst, i16i8imm:$src), - "or{w}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), i16immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def OR32mi8 : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src), - "or{l}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>; - - def OR8i8 : Ii8 <0x0C, RawFrm, (outs), (ins i8imm:$src), - "or{b}\t{$src, %al|%al, $src}", []>; - def OR16i16 : Ii16 <0x0D, RawFrm, (outs), (ins i16imm:$src), - "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src), - "or{l}\t{$src, %eax|%eax, $src}", []>; -} // Constraints = "" - - -let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y - def XOR8rr : I<0x30, MRMDestReg, - (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), - "xor{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, - GR8:$src2))]>; - def XOR16rr : I<0x31, MRMDestReg, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "xor{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1, - GR16:$src2))]>, OpSize; - def XOR32rr : I<0x31, MRMDestReg, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "xor{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, - GR32:$src2))]>; -} // isCommutable = 1 - -// XOR instructions with the destination register in REG and the source register -// in R/M. Included for the disassembler. -let isCodeGenOnly = 1 in { -def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "xor{b}\t{$src2, $dst|$dst, $src2}", []>; -def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "xor{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; -def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "xor{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -def XOR8rm : I<0x32, MRMSrcMem, - (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2), - "xor{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, - (load addr:$src2)))]>; -def XOR16rm : I<0x33, MRMSrcMem, - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "xor{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1, - (load addr:$src2)))]>, - OpSize; -def XOR32rm : I<0x33, MRMSrcMem, - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "xor{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, - (load addr:$src2)))]>; - -def XOR8ri : Ii8<0x80, MRM6r, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "xor{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, imm:$src2))]>; -def XOR16ri : Ii16<0x81, MRM6r, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), - "xor{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1, - imm:$src2))]>, OpSize; -def XOR32ri : Ii32<0x81, MRM6r, - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), - "xor{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, - imm:$src2))]>; -def XOR16ri8 : Ii8<0x83, MRM6r, - (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), - "xor{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1, - i16immSExt8:$src2))]>, - OpSize; -def XOR32ri8 : Ii8<0x83, MRM6r, - (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), - "xor{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, - i32immSExt8:$src2))]>; - -let Constraints = "" in { - def XOR8mr : I<0x30, MRMDestMem, - (outs), (ins i8mem :$dst, GR8 :$src), - "xor{b}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), GR8:$src), addr:$dst), - (implicit EFLAGS)]>; - def XOR16mr : I<0x31, MRMDestMem, - (outs), (ins i16mem:$dst, GR16:$src), - "xor{w}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), GR16:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def XOR32mr : I<0x31, MRMDestMem, - (outs), (ins i32mem:$dst, GR32:$src), - "xor{l}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), GR32:$src), addr:$dst), - (implicit EFLAGS)]>; - def XOR8mi : Ii8<0x80, MRM6m, - (outs), (ins i8mem :$dst, i8imm :$src), - "xor{b}\t{$src, $dst|$dst, $src}", - [(store (xor (loadi8 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>; - def XOR16mi : Ii16<0x81, MRM6m, - (outs), (ins i16mem:$dst, i16imm:$src), - "xor{w}\t{$src, $dst|$dst, $src}", - [(store (xor (loadi16 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def XOR32mi : Ii32<0x81, MRM6m, - (outs), (ins i32mem:$dst, i32imm:$src), - "xor{l}\t{$src, $dst|$dst, $src}", - [(store (xor (loadi32 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>; - def XOR16mi8 : Ii8<0x83, MRM6m, - (outs), (ins i16mem:$dst, i16i8imm :$src), - "xor{w}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), i16immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def XOR32mi8 : Ii8<0x83, MRM6m, - (outs), (ins i32mem:$dst, i32i8imm :$src), - "xor{l}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>; - - def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src), - "xor{b}\t{$src, %al|%al, $src}", []>; - def XOR16i16 : Ii16<0x35, RawFrm, (outs), (ins i16imm:$src), - "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src), - "xor{l}\t{$src, %eax|%eax, $src}", []>; -} // Constraints = "" -} // Defs = [EFLAGS] - -// Shift instructions -let Defs = [EFLAGS] in { -let Uses = [CL] in { -def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), - "shl{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (shl GR8:$src1, CL))]>; -def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1), - "shl{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize; -def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1), - "shl{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (shl GR32:$src1, CL))]>; -} // Uses = [CL] - -def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), - "shl{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>; - -let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. -def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), - "shl{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize; -def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), - "shl{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>; - -// NOTE: We don't include patterns for shifts of a register by one, because -// 'add reg,reg' is cheaper. - -def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1), - "shl{b}\t$dst", []>; -def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1), - "shl{w}\t$dst", []>, OpSize; -def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), - "shl{l}\t$dst", []>; - -} // isConvertibleToThreeAddress = 1 - -let Constraints = "" in { - let Uses = [CL] in { - def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst), - "shl{b}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>; - def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst), - "shl{w}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; - def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst), - "shl{l}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>; - } - def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src), - "shl{b}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src), - "shl{w}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, - OpSize; - def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src), - "shl{l}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - - // Shift by 1 - def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst), - "shl{b}\t$dst", - [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; - def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst), - "shl{w}\t$dst", - [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize; - def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst), - "shl{l}\t$dst", - [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} // Constraints = "" - -let Uses = [CL] in { -def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), - "shr{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (srl GR8:$src1, CL))]>; -def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1), - "shr{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize; -def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1), - "shr{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (srl GR32:$src1, CL))]>; -} - -def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "shr{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>; -def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), - "shr{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize; -def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), - "shr{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>; - -// Shift by 1 -def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1), - "shr{b}\t$dst", - [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>; -def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1), - "shr{w}\t$dst", - [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize; -def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), - "shr{l}\t$dst", - [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>; - -let Constraints = "" in { - let Uses = [CL] in { - def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), - "shr{b}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>; - def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst), - "shr{w}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>, - OpSize; - def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst), - "shr{l}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>; - } - def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src), - "shr{b}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src), - "shr{w}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, - OpSize; - def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src), - "shr{l}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - - // Shift by 1 - def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst), - "shr{b}\t$dst", - [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; - def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst), - "shr{w}\t$dst", - [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize; - def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst), - "shr{l}\t$dst", - [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} // Constraints = "" - -let Uses = [CL] in { -def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), - "sar{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (sra GR8:$src1, CL))]>; -def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1), - "sar{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize; -def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1), - "sar{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (sra GR32:$src1, CL))]>; -} - -def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), - "sar{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>; -def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), - "sar{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>, - OpSize; -def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), - "sar{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>; - -// Shift by 1 -def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), - "sar{b}\t$dst", - [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>; -def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1), - "sar{w}\t$dst", - [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize; -def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), - "sar{l}\t$dst", - [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>; - -let Constraints = "" in { - let Uses = [CL] in { - def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), - "sar{b}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>; - def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst), - "sar{w}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; - def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), - "sar{l}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>; - } - def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src), - "sar{b}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src), - "sar{w}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, - OpSize; - def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src), - "sar{l}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - - // Shift by 1 - def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst), - "sar{b}\t$dst", - [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; - def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst), - "sar{w}\t$dst", - [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize; - def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst), - "sar{l}\t$dst", - [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} // Constraints = "" - -// Rotate instructions - -def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), - "rcl{b}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; -} -def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; - -def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1), - "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; -let Uses = [CL] in { -def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -} -def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; - -def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1), - "rcl{l}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; -} -def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; - -def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1), - "rcr{b}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; -} -def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; - -def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1), - "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; -let Uses = [CL] in { -def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -} -def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; - -def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1), - "rcr{l}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; -} -def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), - "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; - -let Constraints = "" in { -def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), - "rcl{b}\t{1, $dst|$dst, 1}", []>; -def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst), - "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), - "rcl{l}\t{1, $dst|$dst, 1}", []>; -def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), - "rcr{b}\t{1, $dst|$dst, 1}", []>; -def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst), - "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), - "rcr{l}\t{1, $dst|$dst, 1}", []>; -def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt), - "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; - -let Uses = [CL] in { -def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; -def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; -def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; -def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; -} -} // Constraints = "" - -// FIXME: provide shorter instructions when imm8 == 1 -let Uses = [CL] in { -def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), - "rol{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotl GR8:$src1, CL))]>; -def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1), - "rol{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize; -def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1), - "rol{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotl GR32:$src1, CL))]>; -} - -def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), - "rol{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>; -def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), - "rol{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, - OpSize; -def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), - "rol{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>; - -// Rotate by 1 -def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), - "rol{b}\t$dst", - [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>; -def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1), - "rol{w}\t$dst", - [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize; -def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), - "rol{l}\t$dst", - [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>; - -let Constraints = "" in { - let Uses = [CL] in { - def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), - "rol{b}\t{%cl, $dst|$dst, CL}", - [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>; - def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst), - "rol{w}\t{%cl, $dst|$dst, CL}", - [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; - def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst), - "rol{l}\t{%cl, $dst|$dst, CL}", - [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>; - } - def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src), - "rol{b}\t{$src, $dst|$dst, $src}", - [(store (rotl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src), - "rol{w}\t{$src, $dst|$dst, $src}", - [(store (rotl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, - OpSize; - def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src), - "rol{l}\t{$src, $dst|$dst, $src}", - [(store (rotl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - - // Rotate by 1 - def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst), - "rol{b}\t$dst", - [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; - def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst), - "rol{w}\t$dst", - [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize; - def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst), - "rol{l}\t$dst", - [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} // Constraints = "" - -let Uses = [CL] in { -def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), - "ror{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotr GR8:$src1, CL))]>; -def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1), - "ror{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize; -def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1), - "ror{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotr GR32:$src1, CL))]>; -} - -def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), - "ror{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>; -def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), - "ror{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, - OpSize; -def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), - "ror{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>; - -// Rotate by 1 -def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), - "ror{b}\t$dst", - [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>; -def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1), - "ror{w}\t$dst", - [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize; -def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), - "ror{l}\t$dst", - [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>; - -let Constraints = "" in { - let Uses = [CL] in { - def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), - "ror{b}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>; - def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst), - "ror{w}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; - def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), - "ror{l}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>; - } - def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src), - "ror{b}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src), - "ror{w}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, - OpSize; - def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src), - "ror{l}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - - // Rotate by 1 - def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst), - "ror{b}\t$dst", - [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; - def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst), - "ror{w}\t$dst", - [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize; - def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst), - "ror{l}\t$dst", - [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} // Constraints = "" - - -// Double shift instructions (generalizations of rotate) -let Uses = [CL] in { -def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB; -def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB; -def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>, - TB, OpSize; -def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>, - TB, OpSize; -} - -let isCommutable = 1 in { // These instructions commute to each other. -def SHLD32rri8 : Ii8<0xA4, MRMDestReg, - (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2, i8imm:$src3), - "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, - (i8 imm:$src3)))]>, - TB; -def SHRD32rri8 : Ii8<0xAC, MRMDestReg, - (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2, i8imm:$src3), - "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, - (i8 imm:$src3)))]>, - TB; -def SHLD16rri8 : Ii8<0xA4, MRMDestReg, - (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2, i8imm:$src3), - "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, - (i8 imm:$src3)))]>, - TB, OpSize; -def SHRD16rri8 : Ii8<0xAC, MRMDestReg, - (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2, i8imm:$src3), - "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, - (i8 imm:$src3)))]>, - TB, OpSize; -} - -let Constraints = "" in { - let Uses = [CL] in { - def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL), - addr:$dst)]>, TB; - def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL), - addr:$dst)]>, TB; - } - def SHLD32mri8 : Ii8<0xA4, MRMDestMem, - (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), - "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shld (loadi32 addr:$dst), GR32:$src2, - (i8 imm:$src3)), addr:$dst)]>, - TB; - def SHRD32mri8 : Ii8<0xAC, MRMDestMem, - (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), - "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, - (i8 imm:$src3)), addr:$dst)]>, - TB; - - let Uses = [CL] in { - def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL), - addr:$dst)]>, TB, OpSize; - def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL), - addr:$dst)]>, TB, OpSize; - } - def SHLD16mri8 : Ii8<0xA4, MRMDestMem, - (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), - "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shld (loadi16 addr:$dst), GR16:$src2, - (i8 imm:$src3)), addr:$dst)]>, - TB, OpSize; - def SHRD16mri8 : Ii8<0xAC, MRMDestMem, - (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), - "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, - (i8 imm:$src3)), addr:$dst)]>, - TB, OpSize; -} // Constraints = "" -} // Defs = [EFLAGS] - - -// Arithmetic. -let Defs = [EFLAGS] in { -let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y -// Register-Register Addition -def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst), - (ins GR8 :$src1, GR8 :$src2), - "add{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, GR8:$src2))]>; - -let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. -// Register-Register Addition -def ADD16rr : I<0x01, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1, - GR16:$src2))]>, OpSize; -def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1, - GR32:$src2))]>; -} // end isConvertibleToThreeAddress -} // end isCommutable - -// These are alternate spellings for use by the disassembler, we mark them as -// code gen only to ensure they aren't matched by the assembler. -let isCodeGenOnly = 1 in { - def ADD8rr_alt: I<0x02, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "add{b}\t{$src2, $dst|$dst, $src2}", []>; - def ADD16rr_alt: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; - def ADD32rr_alt: I<0x03, MRMSrcReg,(outs GR32:$dst),(ins GR32:$src1, GR32:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -// Register-Memory Addition -def ADD8rm : I<0x02, MRMSrcMem, (outs GR8 :$dst), - (ins GR8 :$src1, i8mem :$src2), - "add{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, - (load addr:$src2)))]>; -def ADD16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$src1, i16mem:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1, - (load addr:$src2)))]>, OpSize; -def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$src1, i32mem:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1, - (load addr:$src2)))]>; - -// Register-Integer Addition -def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "add{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, - (X86add_flag GR8:$src1, imm:$src2))]>; - -let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. -// Register-Integer Addition -def ADD16ri : Ii16<0x81, MRM0r, (outs GR16:$dst), - (ins GR16:$src1, i16imm:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86add_flag GR16:$src1, imm:$src2))]>, OpSize; -def ADD32ri : Ii32<0x81, MRM0r, (outs GR32:$dst), - (ins GR32:$src1, i32imm:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86add_flag GR32:$src1, imm:$src2))]>; -def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst), - (ins GR16:$src1, i16i8imm:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86add_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize; -def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst), - (ins GR32:$src1, i32i8imm:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86add_flag GR32:$src1, i32immSExt8:$src2))]>; -} - -let Constraints = "" in { - // Memory-Register Addition - def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - "add{b}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), GR8:$src2), addr:$dst), - (implicit EFLAGS)]>; - def ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), GR16:$src2), addr:$dst), - (implicit EFLAGS)]>, OpSize; - def ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), GR32:$src2), addr:$dst), - (implicit EFLAGS)]>; - def ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2), - "add{b}\t{$src2, $dst|$dst, $src2}", - [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst), - (implicit EFLAGS)]>; - def ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst), - (implicit EFLAGS)]>, OpSize; - def ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst), - (implicit EFLAGS)]>; - def ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)]>, OpSize; - def ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)]>; - - // addition to rAX - def ADD8i8 : Ii8<0x04, RawFrm, (outs), (ins i8imm:$src), - "add{b}\t{$src, %al|%al, $src}", []>; - def ADD16i16 : Ii16<0x05, RawFrm, (outs), (ins i16imm:$src), - "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src), - "add{l}\t{$src, %eax|%eax, $src}", []>; -} // Constraints = "" - -let Uses = [EFLAGS] in { -let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y -def ADC8rr : I<0x10, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "adc{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (adde GR8:$src1, GR8:$src2))]>; -def ADC16rr : I<0x11, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (adde GR16:$src1, GR16:$src2))]>, OpSize; -def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>; -} - -let isCodeGenOnly = 1 in { -def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "adc{b}\t{$src2, $dst|$dst, $src2}", []>; -def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; -def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst), - (ins GR8:$src1, i8mem:$src2), - "adc{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2)))]>; -def ADC16rm : I<0x13, MRMSrcMem , (outs GR16:$dst), - (ins GR16:$src1, i16mem:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2)))]>, - OpSize; -def ADC32rm : I<0x13, MRMSrcMem , (outs GR32:$dst), - (ins GR32:$src1, i32mem:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (adde GR32:$src1, (load addr:$src2)))]>; -def ADC8ri : Ii8<0x80, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "adc{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (adde GR8:$src1, imm:$src2))]>; -def ADC16ri : Ii16<0x81, MRM2r, (outs GR16:$dst), - (ins GR16:$src1, i16imm:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (adde GR16:$src1, imm:$src2))]>, OpSize; -def ADC16ri8 : Ii8<0x83, MRM2r, (outs GR16:$dst), - (ins GR16:$src1, i16i8imm:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (adde GR16:$src1, i16immSExt8:$src2))]>, - OpSize; -def ADC32ri : Ii32<0x81, MRM2r, (outs GR32:$dst), - (ins GR32:$src1, i32imm:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (adde GR32:$src1, imm:$src2))]>; -def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst), - (ins GR32:$src1, i32i8imm:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>; - -let Constraints = "" in { - def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - "adc{b}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>; - def ADC16mr : I<0x11, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), GR16:$src2), addr:$dst)]>, - OpSize; - def ADC32mr : I<0x11, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), GR32:$src2), addr:$dst)]>; - def ADC8mi : Ii8<0x80, MRM2m, (outs), (ins i8mem:$dst, i8imm:$src2), - "adc{b}\t{$src2, $dst|$dst, $src2}", - [(store (adde (loadi8 addr:$dst), imm:$src2), addr:$dst)]>; - def ADC16mi : Ii16<0x81, MRM2m, (outs), (ins i16mem:$dst, i16imm:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(store (adde (loadi16 addr:$dst), imm:$src2), addr:$dst)]>, - OpSize; - def ADC16mi8 : Ii8<0x83, MRM2m, (outs), (ins i16mem:$dst, i16i8imm :$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>, - OpSize; - def ADC32mi : Ii32<0x81, MRM2m, (outs), (ins i32mem:$dst, i32imm:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>; - def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>; - - def ADC8i8 : Ii8<0x14, RawFrm, (outs), (ins i8imm:$src), - "adc{b}\t{$src, %al|%al, $src}", []>; - def ADC16i16 : Ii16<0x15, RawFrm, (outs), (ins i16imm:$src), - "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src), - "adc{l}\t{$src, %eax|%eax, $src}", []>; -} // Constraints = "" -} // Uses = [EFLAGS] - -// Register-Register Subtraction -def SUB8rr : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "sub{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, - (X86sub_flag GR8:$src1, GR8:$src2))]>; -def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86sub_flag GR16:$src1, GR16:$src2))]>, OpSize; -def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86sub_flag GR32:$src1, GR32:$src2))]>; - -let isCodeGenOnly = 1 in { -def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "sub{b}\t{$src2, $dst|$dst, $src2}", []>; -def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; -def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -// Register-Memory Subtraction -def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst), - (ins GR8 :$src1, i8mem :$src2), - "sub{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, - (X86sub_flag GR8:$src1, (load addr:$src2)))]>; -def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$src1, i16mem:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86sub_flag GR16:$src1, (load addr:$src2)))]>, OpSize; -def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$src1, i32mem:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86sub_flag GR32:$src1, (load addr:$src2)))]>; - -// Register-Integer Subtraction -def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst), - (ins GR8:$src1, i8imm:$src2), - "sub{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, - (X86sub_flag GR8:$src1, imm:$src2))]>; -def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst), - (ins GR16:$src1, i16imm:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86sub_flag GR16:$src1, imm:$src2))]>, OpSize; -def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst), - (ins GR32:$src1, i32imm:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86sub_flag GR32:$src1, imm:$src2))]>; -def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst), - (ins GR16:$src1, i16i8imm:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86sub_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize; -def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst), - (ins GR32:$src1, i32i8imm:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>; - -let Constraints = "" in { - // Memory-Register Subtraction - def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), - "sub{b}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), GR8:$src2), addr:$dst), - (implicit EFLAGS)]>; - def SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), GR16:$src2), addr:$dst), - (implicit EFLAGS)]>, OpSize; - def SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), GR32:$src2), addr:$dst), - (implicit EFLAGS)]>; - - // Memory-Integer Subtraction - def SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), - "sub{b}\t{$src2, $dst|$dst, $src2}", - [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst), - (implicit EFLAGS)]>; - def SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(store (sub (loadi16 addr:$dst), imm:$src2),addr:$dst), - (implicit EFLAGS)]>, OpSize; - def SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(store (sub (loadi32 addr:$dst), imm:$src2),addr:$dst), - (implicit EFLAGS)]>; - def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)]>, OpSize; - def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)]>; - - def SUB8i8 : Ii8<0x2C, RawFrm, (outs), (ins i8imm:$src), - "sub{b}\t{$src, %al|%al, $src}", []>; - def SUB16i16 : Ii16<0x2D, RawFrm, (outs), (ins i16imm:$src), - "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src), - "sub{l}\t{$src, %eax|%eax, $src}", []>; -} // Constraints = "" - -let Uses = [EFLAGS] in { -def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst), - (ins GR8:$src1, GR8:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sube GR8:$src1, GR8:$src2))]>; -def SBB16rr : I<0x19, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sube GR16:$src1, GR16:$src2))]>, OpSize; -def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>; - -let Constraints = "" in { - def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>; - def SBB16mr : I<0x19, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), GR16:$src2), addr:$dst)]>, - OpSize; - def SBB32mr : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>; - def SBB8mi : Ii8<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", - [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>; - def SBB16mi : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(store (sube (loadi16 addr:$dst), imm:$src2), addr:$dst)]>, - OpSize; - def SBB16mi8 : Ii8<0x83, MRM3m, (outs), (ins i16mem:$dst, i16i8imm :$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>, - OpSize; - def SBB32mi : Ii32<0x81, MRM3m, (outs), (ins i32mem:$dst, i32imm:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(store (sube (loadi32 addr:$dst), imm:$src2), addr:$dst)]>; - def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>; - - def SBB8i8 : Ii8<0x1C, RawFrm, (outs), (ins i8imm:$src), - "sbb{b}\t{$src, %al|%al, $src}", []>; - def SBB16i16 : Ii16<0x1D, RawFrm, (outs), (ins i16imm:$src), - "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src), - "sbb{l}\t{$src, %eax|%eax, $src}", []>; -} // Constraints = "" - -let isCodeGenOnly = 1 in { -def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", []>; -def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; -def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2)))]>; -def SBB16rm : I<0x1B, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$src1, i16mem:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2)))]>, - OpSize; -def SBB32rm : I<0x1B, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$src1, i32mem:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sube GR32:$src1, (load addr:$src2)))]>; -def SBB8ri : Ii8<0x80, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sube GR8:$src1, imm:$src2))]>; -def SBB16ri : Ii16<0x81, MRM3r, (outs GR16:$dst), - (ins GR16:$src1, i16imm:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sube GR16:$src1, imm:$src2))]>, OpSize; -def SBB16ri8 : Ii8<0x83, MRM3r, (outs GR16:$dst), - (ins GR16:$src1, i16i8imm:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sube GR16:$src1, i16immSExt8:$src2))]>, - OpSize; -def SBB32ri : Ii32<0x81, MRM3r, (outs GR32:$dst), - (ins GR32:$src1, i32imm:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sube GR32:$src1, imm:$src2))]>; -def SBB32ri8 : Ii8<0x83, MRM3r, (outs GR32:$dst), - (ins GR32:$src1, i32i8imm:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sube GR32:$src1, i32immSExt8:$src2))]>; -} // Uses = [EFLAGS] -} // Defs = [EFLAGS] - -let Defs = [EFLAGS] in { -let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y -// Register-Register Signed Integer Multiply -def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), - "imul{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize; -def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), - "imul{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, GR32:$src2))]>, TB; -} - -// Register-Memory Signed Integer Multiply -def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$src1, i16mem:$src2), - "imul{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, (load addr:$src2)))]>, - TB, OpSize; -def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$src1, i32mem:$src2), - "imul{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB; -} // Defs = [EFLAGS] -} // end Two Address instructions - -// Suprisingly enough, these are not two address instructions! -let Defs = [EFLAGS] in { -// Register-Integer Signed Integer Multiply -def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), - "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize; -def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32 - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), - "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, imm:$src2))]>; -def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 - (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), - "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>, - OpSize; -def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8 - (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), - "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>; - -// Memory-Integer Signed Integer Multiply -def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 - (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), - "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), imm:$src2))]>, - OpSize; -def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 - (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2), - "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), imm:$src2))]>; -def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8 - (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2), - "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), - i16immSExt8:$src2))]>, OpSize; -def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8 - (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2), - "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), - i32immSExt8:$src2))]>; -} // Defs = [EFLAGS] - -//===----------------------------------------------------------------------===// -// Test instructions are just like AND, except they don't generate a result. -// -let Defs = [EFLAGS] in { -let isCommutable = 1 in { // TEST X, Y --> TEST Y, X -def TEST8rr : I<0x84, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2), - "test{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and_su GR8:$src1, GR8:$src2), 0))]>; -def TEST16rr : I<0x85, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2), - "test{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and_su GR16:$src1, GR16:$src2), - 0))]>, - OpSize; -def TEST32rr : I<0x85, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2), - "test{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and_su GR32:$src1, GR32:$src2), - 0))]>; -} - -def TEST8i8 : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src), - "test{b}\t{$src, %al|%al, $src}", []>; -def TEST16i16 : Ii16<0xA9, RawFrm, (outs), (ins i16imm:$src), - "test{w}\t{$src, %ax|%ax, $src}", []>, OpSize; -def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src), - "test{l}\t{$src, %eax|%eax, $src}", []>; - -def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2), - "test{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and GR8:$src1, (loadi8 addr:$src2)), - 0))]>; -def TEST16rm : I<0x85, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2), - "test{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and GR16:$src1, - (loadi16 addr:$src2)), 0))]>, OpSize; -def TEST32rm : I<0x85, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2), - "test{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and GR32:$src1, - (loadi32 addr:$src2)), 0))]>; - -def TEST8ri : Ii8 <0xF6, MRM0r, // flags = GR8 & imm8 - (outs), (ins GR8:$src1, i8imm:$src2), - "test{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and_su GR8:$src1, imm:$src2), 0))]>; -def TEST16ri : Ii16<0xF7, MRM0r, // flags = GR16 & imm16 - (outs), (ins GR16:$src1, i16imm:$src2), - "test{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and_su GR16:$src1, imm:$src2), 0))]>, - OpSize; -def TEST32ri : Ii32<0xF7, MRM0r, // flags = GR32 & imm32 - (outs), (ins GR32:$src1, i32imm:$src2), - "test{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and_su GR32:$src1, imm:$src2), 0))]>; - -def TEST8mi : Ii8 <0xF6, MRM0m, // flags = [mem8] & imm8 - (outs), (ins i8mem:$src1, i8imm:$src2), - "test{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and (loadi8 addr:$src1), imm:$src2), - 0))]>; -def TEST16mi : Ii16<0xF7, MRM0m, // flags = [mem16] & imm16 - (outs), (ins i16mem:$src1, i16imm:$src2), - "test{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and (loadi16 addr:$src1), imm:$src2), - 0))]>, OpSize; -def TEST32mi : Ii32<0xF7, MRM0m, // flags = [mem32] & imm32 - (outs), (ins i32mem:$src1, i32imm:$src2), - "test{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and (loadi32 addr:$src1), imm:$src2), - 0))]>; -} // Defs = [EFLAGS] - // Condition code ops, incl. set if equal/not equal/... let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in @@ -3374,305 +925,10 @@ def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags -let Uses = [EFLAGS] in { -// Use sbb to materialize carry bit. -let Defs = [EFLAGS], isCodeGenOnly = 1 in { -// FIXME: These are pseudo ops that should be replaced with Pat<> patterns. -// However, Pat<> can't replicate the destination reg into the inputs of the -// result. -// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces -// X86CodeEmitter. -def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "", - [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; -def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>, - OpSize; -def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; -} // isCodeGenOnly - -def SETEr : I<0x94, MRM0r, - (outs GR8 :$dst), (ins), - "sete\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_E, EFLAGS))]>, - TB; // GR8 = == -def SETEm : I<0x94, MRM0m, - (outs), (ins i8mem:$dst), - "sete\t$dst", - [(store (X86setcc X86_COND_E, EFLAGS), addr:$dst)]>, - TB; // [mem8] = == - -def SETNEr : I<0x95, MRM0r, - (outs GR8 :$dst), (ins), - "setne\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_NE, EFLAGS))]>, - TB; // GR8 = != -def SETNEm : I<0x95, MRM0m, - (outs), (ins i8mem:$dst), - "setne\t$dst", - [(store (X86setcc X86_COND_NE, EFLAGS), addr:$dst)]>, - TB; // [mem8] = != - -def SETLr : I<0x9C, MRM0r, - (outs GR8 :$dst), (ins), - "setl\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_L, EFLAGS))]>, - TB; // GR8 = < signed -def SETLm : I<0x9C, MRM0m, - (outs), (ins i8mem:$dst), - "setl\t$dst", - [(store (X86setcc X86_COND_L, EFLAGS), addr:$dst)]>, - TB; // [mem8] = < signed - -def SETGEr : I<0x9D, MRM0r, - (outs GR8 :$dst), (ins), - "setge\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_GE, EFLAGS))]>, - TB; // GR8 = >= signed -def SETGEm : I<0x9D, MRM0m, - (outs), (ins i8mem:$dst), - "setge\t$dst", - [(store (X86setcc X86_COND_GE, EFLAGS), addr:$dst)]>, - TB; // [mem8] = >= signed - -def SETLEr : I<0x9E, MRM0r, - (outs GR8 :$dst), (ins), - "setle\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_LE, EFLAGS))]>, - TB; // GR8 = <= signed -def SETLEm : I<0x9E, MRM0m, - (outs), (ins i8mem:$dst), - "setle\t$dst", - [(store (X86setcc X86_COND_LE, EFLAGS), addr:$dst)]>, - TB; // [mem8] = <= signed - -def SETGr : I<0x9F, MRM0r, - (outs GR8 :$dst), (ins), - "setg\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_G, EFLAGS))]>, - TB; // GR8 = > signed -def SETGm : I<0x9F, MRM0m, - (outs), (ins i8mem:$dst), - "setg\t$dst", - [(store (X86setcc X86_COND_G, EFLAGS), addr:$dst)]>, - TB; // [mem8] = > signed - -def SETBr : I<0x92, MRM0r, - (outs GR8 :$dst), (ins), - "setb\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_B, EFLAGS))]>, - TB; // GR8 = < unsign -def SETBm : I<0x92, MRM0m, - (outs), (ins i8mem:$dst), - "setb\t$dst", - [(store (X86setcc X86_COND_B, EFLAGS), addr:$dst)]>, - TB; // [mem8] = < unsign - -def SETAEr : I<0x93, MRM0r, - (outs GR8 :$dst), (ins), - "setae\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_AE, EFLAGS))]>, - TB; // GR8 = >= unsign -def SETAEm : I<0x93, MRM0m, - (outs), (ins i8mem:$dst), - "setae\t$dst", - [(store (X86setcc X86_COND_AE, EFLAGS), addr:$dst)]>, - TB; // [mem8] = >= unsign - -def SETBEr : I<0x96, MRM0r, - (outs GR8 :$dst), (ins), - "setbe\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_BE, EFLAGS))]>, - TB; // GR8 = <= unsign -def SETBEm : I<0x96, MRM0m, - (outs), (ins i8mem:$dst), - "setbe\t$dst", - [(store (X86setcc X86_COND_BE, EFLAGS), addr:$dst)]>, - TB; // [mem8] = <= unsign - -def SETAr : I<0x97, MRM0r, - (outs GR8 :$dst), (ins), - "seta\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_A, EFLAGS))]>, - TB; // GR8 = > signed -def SETAm : I<0x97, MRM0m, - (outs), (ins i8mem:$dst), - "seta\t$dst", - [(store (X86setcc X86_COND_A, EFLAGS), addr:$dst)]>, - TB; // [mem8] = > signed - -def SETSr : I<0x98, MRM0r, - (outs GR8 :$dst), (ins), - "sets\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_S, EFLAGS))]>, - TB; // GR8 = <sign bit> -def SETSm : I<0x98, MRM0m, - (outs), (ins i8mem:$dst), - "sets\t$dst", - [(store (X86setcc X86_COND_S, EFLAGS), addr:$dst)]>, - TB; // [mem8] = <sign bit> -def SETNSr : I<0x99, MRM0r, - (outs GR8 :$dst), (ins), - "setns\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_NS, EFLAGS))]>, - TB; // GR8 = !<sign bit> -def SETNSm : I<0x99, MRM0m, - (outs), (ins i8mem:$dst), - "setns\t$dst", - [(store (X86setcc X86_COND_NS, EFLAGS), addr:$dst)]>, - TB; // [mem8] = !<sign bit> - -def SETPr : I<0x9A, MRM0r, - (outs GR8 :$dst), (ins), - "setp\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_P, EFLAGS))]>, - TB; // GR8 = parity -def SETPm : I<0x9A, MRM0m, - (outs), (ins i8mem:$dst), - "setp\t$dst", - [(store (X86setcc X86_COND_P, EFLAGS), addr:$dst)]>, - TB; // [mem8] = parity -def SETNPr : I<0x9B, MRM0r, - (outs GR8 :$dst), (ins), - "setnp\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_NP, EFLAGS))]>, - TB; // GR8 = not parity -def SETNPm : I<0x9B, MRM0m, - (outs), (ins i8mem:$dst), - "setnp\t$dst", - [(store (X86setcc X86_COND_NP, EFLAGS), addr:$dst)]>, - TB; // [mem8] = not parity - -def SETOr : I<0x90, MRM0r, - (outs GR8 :$dst), (ins), - "seto\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_O, EFLAGS))]>, - TB; // GR8 = overflow -def SETOm : I<0x90, MRM0m, - (outs), (ins i8mem:$dst), - "seto\t$dst", - [(store (X86setcc X86_COND_O, EFLAGS), addr:$dst)]>, - TB; // [mem8] = overflow -def SETNOr : I<0x91, MRM0r, - (outs GR8 :$dst), (ins), - "setno\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_NO, EFLAGS))]>, - TB; // GR8 = not overflow -def SETNOm : I<0x91, MRM0m, - (outs), (ins i8mem:$dst), - "setno\t$dst", - [(store (X86setcc X86_COND_NO, EFLAGS), addr:$dst)]>, - TB; // [mem8] = not overflow -} // Uses = [EFLAGS] - - -// Integer comparisons -let Defs = [EFLAGS] in { -def CMP8i8 : Ii8<0x3C, RawFrm, (outs), (ins i8imm:$src), - "cmp{b}\t{$src, %al|%al, $src}", []>; -def CMP16i16 : Ii16<0x3D, RawFrm, (outs), (ins i16imm:$src), - "cmp{w}\t{$src, %ax|%ax, $src}", []>, OpSize; -def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src), - "cmp{l}\t{$src, %eax|%eax, $src}", []>; - -def CMP8rr : I<0x38, MRMDestReg, - (outs), (ins GR8 :$src1, GR8 :$src2), - "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR8:$src1, GR8:$src2))]>; -def CMP16rr : I<0x39, MRMDestReg, - (outs), (ins GR16:$src1, GR16:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR16:$src1, GR16:$src2))]>, OpSize; -def CMP32rr : I<0x39, MRMDestReg, - (outs), (ins GR32:$src1, GR32:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR32:$src1, GR32:$src2))]>; -def CMP8mr : I<0x38, MRMDestMem, - (outs), (ins i8mem :$src1, GR8 :$src2), - "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi8 addr:$src1), GR8:$src2))]>; -def CMP16mr : I<0x39, MRMDestMem, - (outs), (ins i16mem:$src1, GR16:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi16 addr:$src1), GR16:$src2))]>, - OpSize; -def CMP32mr : I<0x39, MRMDestMem, - (outs), (ins i32mem:$src1, GR32:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi32 addr:$src1), GR32:$src2))]>; -def CMP8rm : I<0x3A, MRMSrcMem, - (outs), (ins GR8 :$src1, i8mem :$src2), - "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR8:$src1, (loadi8 addr:$src2)))]>; -def CMP16rm : I<0x3B, MRMSrcMem, - (outs), (ins GR16:$src1, i16mem:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR16:$src1, (loadi16 addr:$src2)))]>, - OpSize; -def CMP32rm : I<0x3B, MRMSrcMem, - (outs), (ins GR32:$src1, i32mem:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR32:$src1, (loadi32 addr:$src2)))]>; - -// These are alternate spellings for use by the disassembler, we mark them as -// code gen only to ensure they aren't matched by the assembler. -let isCodeGenOnly = 1 in { - def CMP8rr_alt : I<0x3A, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2), - "cmp{b}\t{$src2, $src1|$src1, $src2}", []>; - def CMP16rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize; - def CMP32rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", []>; -} -def CMP8ri : Ii8<0x80, MRM7r, - (outs), (ins GR8:$src1, i8imm:$src2), - "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR8:$src1, imm:$src2))]>; -def CMP16ri : Ii16<0x81, MRM7r, - (outs), (ins GR16:$src1, i16imm:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR16:$src1, imm:$src2))]>, OpSize; -def CMP32ri : Ii32<0x81, MRM7r, - (outs), (ins GR32:$src1, i32imm:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR32:$src1, imm:$src2))]>; -def CMP8mi : Ii8 <0x80, MRM7m, - (outs), (ins i8mem :$src1, i8imm :$src2), - "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi8 addr:$src1), imm:$src2))]>; -def CMP16mi : Ii16<0x81, MRM7m, - (outs), (ins i16mem:$src1, i16imm:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi16 addr:$src1), imm:$src2))]>, - OpSize; -def CMP32mi : Ii32<0x81, MRM7m, - (outs), (ins i32mem:$src1, i32imm:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi32 addr:$src1), imm:$src2))]>; -def CMP16ri8 : Ii8<0x83, MRM7r, - (outs), (ins GR16:$src1, i16i8imm:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR16:$src1, i16immSExt8:$src2))]>, - OpSize; -def CMP16mi8 : Ii8<0x83, MRM7m, - (outs), (ins i16mem:$src1, i16i8imm:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi16 addr:$src1), - i16immSExt8:$src2))]>, OpSize; -def CMP32mi8 : Ii8<0x83, MRM7m, - (outs), (ins i32mem:$src1, i32i8imm:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi32 addr:$src1), - i32immSExt8:$src2))]>; -def CMP32ri8 : Ii8<0x83, MRM7r, - (outs), (ins GR32:$src1, i32i8imm:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR32:$src1, i32immSExt8:$src2))]>; -} // Defs = [EFLAGS] +//===----------------------------------------------------------------------===// +// Bit tests instructions: BT, BTS, BTR, BTC. -// Bit tests. -// TODO: BTC, BTR, and BTS let Defs = [EFLAGS] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", @@ -3680,6 +936,9 @@ def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB; +def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB; // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's @@ -3687,17 +946,23 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), // only for now. def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), - "bt{w}\t{$src2, $src1|$src1, $src2}", + "bt{w}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi16 addr:$src1), GR16:$src2), // (implicit EFLAGS)] [] >, OpSize, TB, Requires<[FastBTMem]>; def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), - "bt{l}\t{$src2, $src1|$src1, $src2}", + "bt{l}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi32 addr:$src1), GR32:$src2), // (implicit EFLAGS)] [] >, TB, Requires<[FastBTMem]>; +def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", +// [(X86bt (loadi64 addr:$src1), GR64:$src2), +// (implicit EFLAGS)] + [] + >, TB; def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", @@ -3706,6 +971,10 @@ def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB; +def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; + // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. @@ -3717,307 +986,129 @@ def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2)) ]>, TB; +def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86bt (loadi64 addr:$src1), + i64immSExt8:$src2))]>, TB; + def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; } // Defs = [EFLAGS] -// Sign/Zero extenders -// Use movsbl intead of movsbw; we don't care about the high 16 bits -// of the register here. This has a smaller encoding and avoids a -// partial-register update. Actual movsbw included for the disassembler. -def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), - "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), - "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), - "", [(set GR16:$dst, (sext GR8:$src))]>, TB; -def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), - "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB; -def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), - "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR8:$src))]>, TB; -def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), - "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB; -def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), - "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR16:$src))]>, TB; -def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), - "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB; - -// Use movzbl intead of movzbw; we don't care about the high 16 bits -// of the register here. This has a smaller encoding and avoids a -// partial-register update. Actual movzbw included for the disassembler. -def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), - "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), - "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), - "", [(set GR16:$dst, (zext GR8:$src))]>, TB; -def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), - "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB; -def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), - "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR8:$src))]>, TB; -def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), - "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB; -def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), - "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR16:$src))]>, TB; -def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), - "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB; - -// These are the same as the regular MOVZX32rr8 and MOVZX32rm8 -// except that they use GR32_NOREX for the output operand register class -// instead of GR32. This allows them to operate on h registers on x86-64. -def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, - (outs GR32_NOREX:$dst), (ins GR8:$src), - "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX", - []>, TB; -let mayLoad = 1 in -def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, - (outs GR32_NOREX:$dst), (ins i8mem:$src), - "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX", - []>, TB; - -let neverHasSideEffects = 1 in { - let Defs = [AX], Uses = [AL] in - def CBW : I<0x98, RawFrm, (outs), (ins), - "{cbtw|cbw}", []>, OpSize; // AX = signext(AL) - let Defs = [EAX], Uses = [AX] in - def CWDE : I<0x98, RawFrm, (outs), (ins), - "{cwtl|cwde}", []>; // EAX = signext(AX) - - let Defs = [AX,DX], Uses = [AX] in - def CWD : I<0x99, RawFrm, (outs), (ins), - "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX) - let Defs = [EAX,EDX], Uses = [EAX] in - def CDQ : I<0x99, RawFrm, (outs), (ins), - "{cltd|cdq}", []>; // EDX:EAX = signext(EAX) -} - -//===----------------------------------------------------------------------===// -// Alias Instructions -//===----------------------------------------------------------------------===// - -// Alias instructions that map movr0 to xor. -// FIXME: remove when we can teach regalloc that xor reg, reg is ok. -// FIXME: Set encoding to pseudo. -let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, - isCodeGenOnly = 1 in { -def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)]>; - -// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller -// encoding and avoids a partial-register update sometimes, but doing so -// at isel time interferes with rematerialization in the current register -// allocator. For now, this is rewritten when the instruction is lowered -// to an MCInst. -def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), - "", - [(set GR16:$dst, 0)]>, OpSize; - -// FIXME: Set encoding to pseudo. -def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 0)]>; -} - -//===----------------------------------------------------------------------===// -// Thread Local Storage Instructions -// - -// ELF TLS Support -// All calls clobber the non-callee saved registers. ESP is marked as -// a use to prevent stack-pointer assignments that appear immediately -// before calls from potentially appearing dead. -let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in -def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), - "leal\t$sym, %eax; " - "call\t___tls_get_addr@PLT", - [(X86tlsaddr tls32addr:$sym)]>, - Requires<[In32BitMode]>; - -// Darwin TLS Support -// For i386, the address of the thunk is passed on the stack, on return the -// address of the variable is in %eax. %ecx is trashed during the function -// call. All other registers are preserved. -let Defs = [EAX, ECX], - Uses = [ESP], - usesCustomInserter = 1 in -def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), - "# TLSCall_32", - [(X86TLSCall addr:$sym)]>, - Requires<[In32BitMode]>; - -let AddedComplexity = 5, isCodeGenOnly = 1 in -def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "movl\t%gs:$src, $dst", - [(set GR32:$dst, (gsload addr:$src))]>, SegGS; - -let AddedComplexity = 5, isCodeGenOnly = 1 in -def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "movl\t%fs:$src, $dst", - [(set GR32:$dst, (fsload addr:$src))]>, SegFS; - -//===----------------------------------------------------------------------===// -// EH Pseudo Instructions -// -let isTerminator = 1, isReturn = 1, isBarrier = 1, - hasCtrlDep = 1, isCodeGenOnly = 1 in { -def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), - "ret\t#eh_return, addr: $addr", - [(X86ehret GR32:$addr)]>; - -} //===----------------------------------------------------------------------===// // Atomic support // -// Memory barriers - -// TODO: Get this to fold the constant into the instruction. -def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), - "lock\n\t" - "or{l}\t{$zero, $dst|$dst, $zero}", - []>, Requires<[In32BitMode]>, LOCK; - -let hasSideEffects = 1 in { -def Int_MemBarrier : I<0, Pseudo, (outs), (ins), - "#MEMBARRIER", - [(X86MemBarrier)]>, Requires<[HasSSE2]>; -} // Atomic swap. These are just normal xchg instructions. But since a memory // operand is referenced, the atomicity is ensured. let Constraints = "$val = $dst" in { -def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$val, i32mem:$ptr), - "xchg{l}\t{$val, $ptr|$ptr, $val}", - [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>; -def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$val, i16mem:$ptr), - "xchg{w}\t{$val, $ptr|$ptr, $val}", - [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>, - OpSize; def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), - "xchg{b}\t{$val, $ptr|$ptr, $val}", + "xchg{b}\t{$val, $ptr|$ptr, $val}", [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>; +def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr), + "xchg{w}\t{$val, $ptr|$ptr, $val}", + [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>, + OpSize; +def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr), + "xchg{l}\t{$val, $ptr|$ptr, $val}", + [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>; +def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr), + "xchg{q}\t{$val, $ptr|$ptr, $val}", + [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>; -def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src), - "xchg{l}\t{$val, $src|$src, $val}", []>; -def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src), - "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize; def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src), "xchg{b}\t{$val, $src|$src, $val}", []>; +def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src), + "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize; +def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src), + "xchg{l}\t{$val, $src|$src, $val}", []>; +def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), + "xchg{q}\t{$val, $src|$src, $val}", []>; } def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src), "xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src), "xchg{l}\t{$src, %eax|%eax, $src}", []>; +def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), + "xchg{q}\t{$src, %rax|%rax, $src}", []>; -// Atomic compare and swap. -let Defs = [EAX, EFLAGS], Uses = [EAX] in { -def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), - "lock\n\t" - "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; -} -let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { -def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), - "lock\n\t" - "cmpxchg8b\t$ptr", - [(X86cas8 addr:$ptr)]>, TB, LOCK; -} - -let Defs = [AX, EFLAGS], Uses = [AX] in { -def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap), - "lock\n\t" - "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK; -} -let Defs = [AL, EFLAGS], Uses = [AL] in { -def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), - "lock\n\t" - "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK; -} -// Atomic exchange and add -let Constraints = "$val = $dst", Defs = [EFLAGS] in { -def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), - "lock\n\t" - "xadd{l}\t{$val, $ptr|$ptr, $val}", - [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>, - TB, LOCK; -def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr), - "lock\n\t" - "xadd{w}\t{$val, $ptr|$ptr, $val}", - [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>, - TB, OpSize, LOCK; -def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), - "lock\n\t" - "xadd{b}\t{$val, $ptr|$ptr, $val}", - [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>, - TB, LOCK; -} def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB; @@ -4025,6 +1116,8 @@ def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; +def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), + "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; let mayLoad = 1, mayStore = 1 in { def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), @@ -4033,6 +1126,9 @@ def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; +def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; + } def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), @@ -4041,6 +1137,8 @@ def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), + "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; let mayLoad = 1, mayStore = 1 in { def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), @@ -4049,284 +1147,29 @@ def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), "cmpxchg8b\t$dst", []>, TB; -// Optimized codegen when the non-memory output is not used. -// FIXME: Use normal add / sub instructions and add lock prefix dynamically. -let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { -def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - "lock\n\t" - "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "lock\n\t" - "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; -def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "lock\n\t" - "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2), - "lock\n\t" - "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2), - "lock\n\t" - "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2), - "lock\n\t" - "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2), - "lock\n\t" - "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; -def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2), - "lock\n\t" - "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; - -def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), - "lock\n\t" - "inc{b}\t$dst", []>, LOCK; -def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), - "lock\n\t" - "inc{w}\t$dst", []>, OpSize, LOCK; -def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), - "lock\n\t" - "inc{l}\t$dst", []>, LOCK; - -def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), - "lock\n\t" - "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "lock\n\t" - "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; -def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "lock\n\t" - "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), - "lock\n\t" - "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), - "lock\n\t" - "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; -def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), - "lock\n\t" - "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), - "lock\n\t" - "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; -def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), - "lock\n\t" - "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; - -def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), - "lock\n\t" - "dec{b}\t$dst", []>, LOCK; -def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), - "lock\n\t" - "dec{w}\t$dst", []>, OpSize, LOCK; -def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), - "lock\n\t" - "dec{l}\t$dst", []>, LOCK; -} +let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in +def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), + "cmpxchg16b\t$dst", []>, TB; -// Atomic exchange, and, or, xor -let Constraints = "$val = $dst", Defs = [EFLAGS], - usesCustomInserter = 1 in { -def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMAND32 PSEUDO!", - [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>; -def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMOR32 PSEUDO!", - [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>; -def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMXOR32 PSEUDO!", - [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>; -def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMNAND32 PSEUDO!", - [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>; -def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), - "#ATOMMIN32 PSEUDO!", - [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>; -def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMMAX32 PSEUDO!", - [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>; -def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMUMIN32 PSEUDO!", - [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>; -def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMUMAX32 PSEUDO!", - [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>; - -def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMAND16 PSEUDO!", - [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>; -def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMOR16 PSEUDO!", - [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>; -def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMXOR16 PSEUDO!", - [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>; -def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMNAND16 PSEUDO!", - [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>; -def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), - "#ATOMMIN16 PSEUDO!", - [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>; -def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMMAX16 PSEUDO!", - [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>; -def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMUMIN16 PSEUDO!", - [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>; -def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMUMAX16 PSEUDO!", - [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>; - -def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMAND8 PSEUDO!", - [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>; -def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMOR8 PSEUDO!", - [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>; -def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMXOR8 PSEUDO!", - [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>; -def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMNAND8 PSEUDO!", - [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>; -} -let Constraints = "$val1 = $dst1, $val2 = $dst2", - Defs = [EFLAGS, EAX, EBX, ECX, EDX], - Uses = [EAX, EBX, ECX, EDX], - mayLoad = 1, mayStore = 1, - usesCustomInserter = 1 in { -def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMAND6432 PSEUDO!", []>; -def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMOR6432 PSEUDO!", []>; -def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMXOR6432 PSEUDO!", []>; -def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMNAND6432 PSEUDO!", []>; -def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMADD6432 PSEUDO!", []>; -def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMSUB6432 PSEUDO!", []>; -def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMSWAP6432 PSEUDO!", []>; -} -// Segmentation support instructions. - -def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; - -// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo. -def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), - "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; -def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; - -def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; -def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; - -def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; - -def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins), - "str{w}\t{$dst}", []>, TB; -def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins), - "str{w}\t{$dst}", []>, TB; -def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), - "ltr{w}\t{$src}", []>, TB; -def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), - "ltr{w}\t{$src}", []>, TB; - -def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), - "push{w}\t%fs", []>, OpSize, TB; -def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), - "push{l}\t%fs", []>, TB; -def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), - "push{w}\t%gs", []>, OpSize, TB; -def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), - "push{l}\t%gs", []>, TB; - -def POPFS16 : I<0xa1, RawFrm, (outs), (ins), - "pop{w}\t%fs", []>, OpSize, TB; -def POPFS32 : I<0xa1, RawFrm, (outs), (ins), - "pop{l}\t%fs", []>, TB; -def POPGS16 : I<0xa9, RawFrm, (outs), (ins), - "pop{w}\t%gs", []>, OpSize, TB; -def POPGS32 : I<0xa9, RawFrm, (outs), (ins), - "pop{l}\t%gs", []>, TB; - -def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lds{l}\t{$src, $dst|$dst, $src}", []>; -def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lss{l}\t{$src, $dst|$dst, $src}", []>, TB; -def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "les{l}\t{$src, $dst|$dst, $src}", []>; -def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB; -def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB; - -def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), - "verr\t$seg", []>, TB; -def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), - "verr\t$seg", []>, TB; -def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), - "verw\t$seg", []>, TB; -def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), - "verw\t$seg", []>, TB; - -// Descriptor-table support instructions - -def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), - "sgdt\t$dst", []>, TB; -def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), - "sidt\t$dst", []>, TB; -def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins), - "sldt{w}\t$dst", []>, TB; -def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins), - "sldt{w}\t$dst", []>, TB; -def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), - "lgdt\t$src", []>, TB; -def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), - "lidt\t$src", []>, TB; -def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), - "lldt{w}\t$src", []>, TB; -def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), - "lldt{w}\t$src", []>, TB; - // Lock instruction prefix def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>; +// Rex64 instruction prefix +def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>; + +// Data16 instruction prefix +def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>; + // Repeat string operation instruction prefixes // These uses the DF flag in the EFLAGS register to inc or dec ECX let Defs = [ECX], Uses = [ECX,EFLAGS] in { @@ -4336,35 +1179,19 @@ def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>; def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>; } -// Segment override instruction prefixes -def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>; -def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>; -def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>; -def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>; -def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>; -def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; // String manipulation instructions - def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>; def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize; def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>; +def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>; def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>; def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize; def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>; -// CPU flow control instructions - -def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>; -def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB; - -// FPU control instructions - -def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB; // Flag instructions - def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>; def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>; def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>; @@ -4376,620 +1203,423 @@ def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>; def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB; // Table lookup instructions - def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>; -// Specialized register support - -def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB; -def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB; -def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB; - -def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), - "smsw{w}\t$dst", []>, OpSize, TB; -def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), - "smsw{l}\t$dst", []>, TB; -// For memory operands, there is only a 16-bit form -def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins), - "smsw{w}\t$dst", []>, TB; - -def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src), - "lmsw{w}\t$src", []>, TB; -def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), - "lmsw{w}\t$src", []>, TB; - -def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; - -// Cache instructions - -def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; -def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; - -// VMX instructions - -// 66 0F 38 80 -def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8; -// 66 0F 38 81 -def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8; -// 0F 01 C1 -def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; -def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), - "vmclear\t$vmcs", []>, OpSize, TB; -// 0F 01 C2 -def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; -// 0F 01 C3 -def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB; -def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), - "vmptrld\t$vmcs", []>, TB; -def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins), - "vmptrst\t$vmcs", []>, TB; -def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src), - "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB; -def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB; -def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src), - "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB; -def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), - "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB; -def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB; -def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB; -def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; -def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; -// 0F 01 C4 -def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; -def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), - "vmxon\t{$vmxon}", []>, XS; +// ASCII Adjust After Addition +// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS +def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>; -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -//===----------------------------------------------------------------------===// +// ASCII Adjust AX Before Division +// sets AL, AH and EFLAGS and uses AL and AH +def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src), + "aad\t$src", []>, Requires<[In32BitMode]>; -// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable -def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>; -def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>; -def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>; -def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>; -def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>; -def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>; - -def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)), - (ADD32ri GR32:$src1, tconstpool:$src2)>; -def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)), - (ADD32ri GR32:$src1, tjumptable:$src2)>; -def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)), - (ADD32ri GR32:$src1, tglobaladdr:$src2)>; -def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)), - (ADD32ri GR32:$src1, texternalsym:$src2)>; -def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)), - (ADD32ri GR32:$src1, tblockaddress:$src2)>; - -def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst), - (MOV32mi addr:$dst, tglobaladdr:$src)>; -def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst), - (MOV32mi addr:$dst, texternalsym:$src)>; -def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst), - (MOV32mi addr:$dst, tblockaddress:$src)>; - -// Calls -// tailcall stuff -def : Pat<(X86tcret GR32_TC:$dst, imm:$off), - (TCRETURNri GR32_TC:$dst, imm:$off)>, - Requires<[In32BitMode]>; - -// FIXME: This is disabled for 32-bit PIC mode because the global base -// register which is part of the address mode may be assigned a -// callee-saved register. -def : Pat<(X86tcret (load addr:$dst), imm:$off), - (TCRETURNmi addr:$dst, imm:$off)>, - Requires<[In32BitMode, IsNotPIC]>; - -def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), - (TCRETURNdi texternalsym:$dst, imm:$off)>, - Requires<[In32BitMode]>; - -def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), - (TCRETURNdi texternalsym:$dst, imm:$off)>, - Requires<[In32BitMode]>; - -// Normal calls, with various flavors of addresses. -def : Pat<(X86call (i32 tglobaladdr:$dst)), - (CALLpcrel32 tglobaladdr:$dst)>; -def : Pat<(X86call (i32 texternalsym:$dst)), - (CALLpcrel32 texternalsym:$dst)>; -def : Pat<(X86call (i32 imm:$dst)), - (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>; - -// X86 specific add which produces a flag. -def : Pat<(addc GR32:$src1, GR32:$src2), - (ADD32rr GR32:$src1, GR32:$src2)>; -def : Pat<(addc GR32:$src1, (load addr:$src2)), - (ADD32rm GR32:$src1, addr:$src2)>; -def : Pat<(addc GR32:$src1, imm:$src2), - (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(addc GR32:$src1, i32immSExt8:$src2), - (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; - -def : Pat<(subc GR32:$src1, GR32:$src2), - (SUB32rr GR32:$src1, GR32:$src2)>; -def : Pat<(subc GR32:$src1, (load addr:$src2)), - (SUB32rm GR32:$src1, addr:$src2)>; -def : Pat<(subc GR32:$src1, imm:$src2), - (SUB32ri GR32:$src1, imm:$src2)>; -def : Pat<(subc GR32:$src1, i32immSExt8:$src2), - (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; - -// Comparisons. - -// TEST R,R is smaller than CMP R,0 -def : Pat<(X86cmp GR8:$src1, 0), - (TEST8rr GR8:$src1, GR8:$src1)>; -def : Pat<(X86cmp GR16:$src1, 0), - (TEST16rr GR16:$src1, GR16:$src1)>; -def : Pat<(X86cmp GR32:$src1, 0), - (TEST32rr GR32:$src1, GR32:$src1)>; - -// Conditional moves with folded loads with operands swapped and conditions -// inverted. -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS), - (CMOVAE16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS), - (CMOVAE32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS), - (CMOVB16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS), - (CMOVB32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS), - (CMOVNE16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS), - (CMOVNE32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS), - (CMOVE16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS), - (CMOVE32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS), - (CMOVA16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS), - (CMOVA32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS), - (CMOVBE16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS), - (CMOVBE32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS), - (CMOVGE16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS), - (CMOVGE32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS), - (CMOVL16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS), - (CMOVL32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS), - (CMOVG16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS), - (CMOVG32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS), - (CMOVLE16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS), - (CMOVLE32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS), - (CMOVNP16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS), - (CMOVNP32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS), - (CMOVP16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS), - (CMOVP32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS), - (CMOVNS16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS), - (CMOVNS32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS), - (CMOVS16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS), - (CMOVS32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS), - (CMOVNO16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS), - (CMOVNO32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS), - (CMOVO16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS), - (CMOVO32rm GR32:$src2, addr:$src1)>; - -// zextload bool -> zextload byte -def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>; -def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; -def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; - -// extload bool -> extload byte -def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>; -def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; -def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; -def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>; -def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; -def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; - -// anyext. Define these to do an explicit zero-extend to -// avoid partial-register updates. -def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; -def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; - -// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. -def : Pat<(i32 (anyext GR16:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; +// ASCII Adjust AX After Multiply +// sets AL, AH and EFLAGS and uses AL +def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src), + "aam\t$src", []>, Requires<[In32BitMode]>; +// ASCII Adjust AL After Subtraction - sets +// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS +def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>; -//===----------------------------------------------------------------------===// -// Some peepholes -//===----------------------------------------------------------------------===// - -// Odd encoding trick: -128 fits into an 8-bit immediate field while -// +128 doesn't, so in this special case use a sub instead of an add. -def : Pat<(add GR16:$src1, 128), - (SUB16ri8 GR16:$src1, -128)>; -def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst), - (SUB16mi8 addr:$dst, -128)>; -def : Pat<(add GR32:$src1, 128), - (SUB32ri8 GR32:$src1, -128)>; -def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), - (SUB32mi8 addr:$dst, -128)>; - -// r & (2^16-1) ==> movz -def : Pat<(and GR32:$src1, 0xffff), - (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; -// r & (2^8-1) ==> movz -def : Pat<(and GR32:$src1, 0xff), - (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, - GR32_ABCD)), - sub_8bit))>, - Requires<[In32BitMode]>; -// r & (2^8-1) ==> movz -def : Pat<(and GR16:$src1, 0xff), - (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, - GR16_ABCD)), - sub_8bit))>, - Requires<[In32BitMode]>; - -// sext_inreg patterns -def : Pat<(sext_inreg GR32:$src, i16), - (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>; -def : Pat<(sext_inreg GR32:$src, i8), - (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, - GR32_ABCD)), - sub_8bit))>, - Requires<[In32BitMode]>; -def : Pat<(sext_inreg GR16:$src, i8), - (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, - GR16_ABCD)), - sub_8bit))>, - Requires<[In32BitMode]>; - -// trunc patterns -def : Pat<(i16 (trunc GR32:$src)), - (EXTRACT_SUBREG GR32:$src, sub_16bit)>; -def : Pat<(i8 (trunc GR32:$src)), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - sub_8bit)>, - Requires<[In32BitMode]>; -def : Pat<(i8 (trunc GR16:$src)), - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit)>, - Requires<[In32BitMode]>; - -// h-register tricks -def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi)>, - Requires<[In32BitMode]>; -def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - sub_8bit_hi)>, - Requires<[In32BitMode]>; -def : Pat<(srl GR16:$src, (i8 8)), - (EXTRACT_SUBREG - (MOVZX32rr8 - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi)), - sub_16bit)>, - Requires<[In32BitMode]>; -def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), - (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, - GR16_ABCD)), - sub_8bit_hi))>, - Requires<[In32BitMode]>; -def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), - (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, - GR16_ABCD)), - sub_8bit_hi))>, - Requires<[In32BitMode]>; -def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), - (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, - GR32_ABCD)), - sub_8bit_hi))>, - Requires<[In32BitMode]>; -def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), - (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, - GR32_ABCD)), - sub_8bit_hi))>, - Requires<[In32BitMode]>; - -// (shl x, 1) ==> (add x, x) -def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; -def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; -def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; - -// (shl x (and y, 31)) ==> (shl x, y) -def : Pat<(shl GR8:$src1, (and CL, 31)), - (SHL8rCL GR8:$src1)>; -def : Pat<(shl GR16:$src1, (and CL, 31)), - (SHL16rCL GR16:$src1)>; -def : Pat<(shl GR32:$src1, (and CL, 31)), - (SHL32rCL GR32:$src1)>; -def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), - (SHL8mCL addr:$dst)>; -def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), - (SHL16mCL addr:$dst)>; -def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), - (SHL32mCL addr:$dst)>; - -def : Pat<(srl GR8:$src1, (and CL, 31)), - (SHR8rCL GR8:$src1)>; -def : Pat<(srl GR16:$src1, (and CL, 31)), - (SHR16rCL GR16:$src1)>; -def : Pat<(srl GR32:$src1, (and CL, 31)), - (SHR32rCL GR32:$src1)>; -def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), - (SHR8mCL addr:$dst)>; -def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), - (SHR16mCL addr:$dst)>; -def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), - (SHR32mCL addr:$dst)>; - -def : Pat<(sra GR8:$src1, (and CL, 31)), - (SAR8rCL GR8:$src1)>; -def : Pat<(sra GR16:$src1, (and CL, 31)), - (SAR16rCL GR16:$src1)>; -def : Pat<(sra GR32:$src1, (and CL, 31)), - (SAR32rCL GR32:$src1)>; -def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst), - (SAR8mCL addr:$dst)>; -def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), - (SAR16mCL addr:$dst)>; -def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), - (SAR32mCL addr:$dst)>; - -// (anyext (setcc_carry)) -> (setcc_carry) -def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C16r)>; -def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C32r)>; -def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C32r)>; - -// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. -let AddedComplexity = 5 in { // Try this before the selecting to OR -def : Pat<(or_is_add GR16:$src1, imm:$src2), - (ADD16ri GR16:$src1, imm:$src2)>; -def : Pat<(or_is_add GR32:$src1, imm:$src2), - (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2), - (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2), - (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; -def : Pat<(or_is_add GR16:$src1, GR16:$src2), - (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(or_is_add GR32:$src1, GR32:$src2), - (ADD32rr GR32:$src1, GR32:$src2)>; -} // AddedComplexity +// Decimal Adjust AL after Addition +// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS +def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>; -//===----------------------------------------------------------------------===// -// EFLAGS-defining Patterns -//===----------------------------------------------------------------------===// +// Decimal Adjust AL after Subtraction +// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS +def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>; -// add reg, reg -def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>; - -// add reg, mem -def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), - (ADD8rm GR8:$src1, addr:$src2)>; -def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), - (ADD16rm GR16:$src1, addr:$src2)>; -def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), - (ADD32rm GR32:$src1, addr:$src2)>; - -// add reg, imm -def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>; -def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>; -def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(add GR16:$src1, i16immSExt8:$src2), - (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(add GR32:$src1, i32immSExt8:$src2), - (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; - -// sub reg, reg -def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>; -def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>; - -// sub reg, mem -def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), - (SUB8rm GR8:$src1, addr:$src2)>; -def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), - (SUB16rm GR16:$src1, addr:$src2)>; -def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), - (SUB32rm GR32:$src1, addr:$src2)>; - -// sub reg, imm -def : Pat<(sub GR8:$src1, imm:$src2), - (SUB8ri GR8:$src1, imm:$src2)>; -def : Pat<(sub GR16:$src1, imm:$src2), - (SUB16ri GR16:$src1, imm:$src2)>; -def : Pat<(sub GR32:$src1, imm:$src2), - (SUB32ri GR32:$src1, imm:$src2)>; -def : Pat<(sub GR16:$src1, i16immSExt8:$src2), - (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(sub GR32:$src1, i32immSExt8:$src2), - (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; - -// mul reg, reg -def : Pat<(mul GR16:$src1, GR16:$src2), - (IMUL16rr GR16:$src1, GR16:$src2)>; -def : Pat<(mul GR32:$src1, GR32:$src2), - (IMUL32rr GR32:$src1, GR32:$src2)>; - -// mul reg, mem -def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), - (IMUL16rm GR16:$src1, addr:$src2)>; -def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), - (IMUL32rm GR32:$src1, addr:$src2)>; - -// mul reg, imm -def : Pat<(mul GR16:$src1, imm:$src2), - (IMUL16rri GR16:$src1, imm:$src2)>; -def : Pat<(mul GR32:$src1, imm:$src2), - (IMUL32rri GR32:$src1, imm:$src2)>; -def : Pat<(mul GR16:$src1, i16immSExt8:$src2), - (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(mul GR32:$src1, i32immSExt8:$src2), - (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>; - -// reg = mul mem, imm -def : Pat<(mul (loadi16 addr:$src1), imm:$src2), - (IMUL16rmi addr:$src1, imm:$src2)>; -def : Pat<(mul (loadi32 addr:$src1), imm:$src2), - (IMUL32rmi addr:$src1, imm:$src2)>; -def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2), - (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>; -def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2), - (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>; - -// Optimize multiply by 2 with EFLAGS result. -let AddedComplexity = 2 in { -def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>; -def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>; -} +// Check Array Index Against Bounds +def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "bound\t{$src, $dst|$dst, $src}", []>, OpSize, + Requires<[In32BitMode]>; +def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "bound\t{$src, $dst|$dst, $src}", []>, + Requires<[In32BitMode]>; -// Patterns for nodes that do not produce flags, for instructions that do. - -// Increment reg. -def : Pat<(add GR8:$src1 , 1), (INC8r GR8:$src1)>; -def : Pat<(add GR16:$src1, 1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>; -def : Pat<(add GR32:$src1, 1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>; - -// Decrement reg. -def : Pat<(add GR8:$src1 , -1), (DEC8r GR8:$src1)>; -def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>; -def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>; - -// or reg/reg. -def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>; -def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>; - -// or reg/mem -def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), - (OR8rm GR8:$src1, addr:$src2)>; -def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), - (OR16rm GR16:$src1, addr:$src2)>; -def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), - (OR32rm GR32:$src1, addr:$src2)>; - -// or reg/imm -def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>; -def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>; -def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>; -def : Pat<(or GR16:$src1, i16immSExt8:$src2), - (OR16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(or GR32:$src1, i32immSExt8:$src2), - (OR32ri8 GR32:$src1, i32immSExt8:$src2)>; - -// xor reg/reg -def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>; -def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>; - -// xor reg/mem -def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), - (XOR8rm GR8:$src1, addr:$src2)>; -def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), - (XOR16rm GR16:$src1, addr:$src2)>; -def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), - (XOR32rm GR32:$src1, addr:$src2)>; - -// xor reg/imm -def : Pat<(xor GR8:$src1, imm:$src2), - (XOR8ri GR8:$src1, imm:$src2)>; -def : Pat<(xor GR16:$src1, imm:$src2), - (XOR16ri GR16:$src1, imm:$src2)>; -def : Pat<(xor GR32:$src1, imm:$src2), - (XOR32ri GR32:$src1, imm:$src2)>; -def : Pat<(xor GR16:$src1, i16immSExt8:$src2), - (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(xor GR32:$src1, i32immSExt8:$src2), - (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; - -// and reg/reg -def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>; -def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>; - -// and reg/mem -def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), - (AND8rm GR8:$src1, addr:$src2)>; -def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), - (AND16rm GR16:$src1, addr:$src2)>; -def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), - (AND32rm GR32:$src1, addr:$src2)>; - -// and reg/imm -def : Pat<(and GR8:$src1, imm:$src2), - (AND8ri GR8:$src1, imm:$src2)>; -def : Pat<(and GR16:$src1, imm:$src2), - (AND16ri GR16:$src1, imm:$src2)>; -def : Pat<(and GR32:$src1, imm:$src2), - (AND32ri GR32:$src1, imm:$src2)>; -def : Pat<(and GR16:$src1, i16immSExt8:$src2), - (AND16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(and GR32:$src1, i32immSExt8:$src2), - (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; +// Adjust RPL Field of Segment Selector +def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst), + "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>; +def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst), + "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>; //===----------------------------------------------------------------------===// -// Floating Point Stack Support +// Subsystems. //===----------------------------------------------------------------------===// -include "X86InstrFPStack.td" - -//===----------------------------------------------------------------------===// -// X86-64 Support -//===----------------------------------------------------------------------===// +include "X86InstrArithmetic.td" +include "X86InstrCMovSetCC.td" +include "X86InstrExtension.td" +include "X86InstrControl.td" +include "X86InstrShiftRotate.td" -include "X86Instr64bit.td" +// X87 Floating Point Stack. +include "X86InstrFPStack.td" -//===----------------------------------------------------------------------===// // SIMD support (SSE, MMX and AVX) -//===----------------------------------------------------------------------===// - include "X86InstrFragmentsSIMD.td" -//===----------------------------------------------------------------------===// // FMA - Fused Multiply-Add support (requires FMA) -//===----------------------------------------------------------------------===// - include "X86InstrFMA.td" +// SSE, MMX and 3DNow! vector support. +include "X86InstrSSE.td" +include "X86InstrMMX.td" +include "X86Instr3DNow.td" + +include "X86InstrVMX.td" + +// System instructions. +include "X86InstrSystem.td" + +// Compiler Pseudo Instructions and Pat Patterns +include "X86InstrCompiler.td" + //===----------------------------------------------------------------------===// -// XMM Floating point support (requires SSE / SSE2) +// Assembler Mnemonic Aliases //===----------------------------------------------------------------------===// -include "X86InstrSSE.td" +def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>; +def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>; + +def : MnemonicAlias<"cbw", "cbtw">; +def : MnemonicAlias<"cwd", "cwtd">; +def : MnemonicAlias<"cdq", "cltd">; +def : MnemonicAlias<"cwde", "cwtl">; +def : MnemonicAlias<"cdqe", "cltq">; + +// lret maps to lretl, it is not ambiguous with lretq. +def : MnemonicAlias<"lret", "lretl">; + +def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>; +def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>; + +def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>; +def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>; +def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>; +def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>; +def : MnemonicAlias<"popfd", "popfl">; + +// FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in +// all modes. However: "push (addr)" and "push $42" should default to +// pushl/pushq depending on the current mode. Similar for "pop %bx" +def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>; +def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>; +def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>; +def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>; +def : MnemonicAlias<"pushfd", "pushfl">; + +def : MnemonicAlias<"repe", "rep">; +def : MnemonicAlias<"repz", "rep">; +def : MnemonicAlias<"repnz", "repne">; + +def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>; +def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>; + +def : MnemonicAlias<"salb", "shlb">; +def : MnemonicAlias<"salw", "shlw">; +def : MnemonicAlias<"sall", "shll">; +def : MnemonicAlias<"salq", "shlq">; + +def : MnemonicAlias<"smovb", "movsb">; +def : MnemonicAlias<"smovw", "movsw">; +def : MnemonicAlias<"smovl", "movsl">; +def : MnemonicAlias<"smovq", "movsq">; + +def : MnemonicAlias<"ud2a", "ud2">; +def : MnemonicAlias<"verrw", "verr">; + +// System instruction aliases. +def : MnemonicAlias<"iret", "iretl">; +def : MnemonicAlias<"sysret", "sysretl">; + +def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>; +def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>; +def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>; +def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>; +def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>; +def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>; +def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>; +def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>; + + +// Floating point stack aliases. +def : MnemonicAlias<"fcmovz", "fcmove">; +def : MnemonicAlias<"fcmova", "fcmovnbe">; +def : MnemonicAlias<"fcmovnae", "fcmovb">; +def : MnemonicAlias<"fcmovna", "fcmovbe">; +def : MnemonicAlias<"fcmovae", "fcmovnb">; +def : MnemonicAlias<"fcomip", "fcompi">; +def : MnemonicAlias<"fildq", "fildll">; +def : MnemonicAlias<"fldcww", "fldcw">; +def : MnemonicAlias<"fnstcww", "fnstcw">; +def : MnemonicAlias<"fnstsww", "fnstsw">; +def : MnemonicAlias<"fucomip", "fucompi">; +def : MnemonicAlias<"fwait", "wait">; + + +class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond> + : MnemonicAlias<!strconcat(Prefix, OldCond, Suffix), + !strconcat(Prefix, NewCond, Suffix)>; + +/// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of +/// MnemonicAlias's that canonicalize the condition code in a mnemonic, for +/// example "setz" -> "sete". +multiclass IntegerCondCodeMnemonicAlias<string Prefix, string Suffix> { + def C : CondCodeAlias<Prefix, Suffix, "c", "b">; // setc -> setb + def Z : CondCodeAlias<Prefix, Suffix, "z" , "e">; // setz -> sete + def NA : CondCodeAlias<Prefix, Suffix, "na", "be">; // setna -> setbe + def NB : CondCodeAlias<Prefix, Suffix, "nb", "ae">; // setnb -> setae + def NC : CondCodeAlias<Prefix, Suffix, "nc", "ae">; // setnc -> setae + def NG : CondCodeAlias<Prefix, Suffix, "ng", "le">; // setng -> setle + def NL : CondCodeAlias<Prefix, Suffix, "nl", "ge">; // setnl -> setge + def NZ : CondCodeAlias<Prefix, Suffix, "nz", "ne">; // setnz -> setne + def PE : CondCodeAlias<Prefix, Suffix, "pe", "p">; // setpe -> setp + def PO : CondCodeAlias<Prefix, Suffix, "po", "np">; // setpo -> setnp + + def NAE : CondCodeAlias<Prefix, Suffix, "nae", "b">; // setnae -> setb + def NBE : CondCodeAlias<Prefix, Suffix, "nbe", "a">; // setnbe -> seta + def NGE : CondCodeAlias<Prefix, Suffix, "nge", "l">; // setnge -> setl + def NLE : CondCodeAlias<Prefix, Suffix, "nle", "g">; // setnle -> setg +} + +// Aliases for set<CC> +defm : IntegerCondCodeMnemonicAlias<"set", "">; +// Aliases for j<CC> +defm : IntegerCondCodeMnemonicAlias<"j", "">; +// Aliases for cmov<CC>{w,l,q} +defm : IntegerCondCodeMnemonicAlias<"cmov", "w">; +defm : IntegerCondCodeMnemonicAlias<"cmov", "l">; +defm : IntegerCondCodeMnemonicAlias<"cmov", "q">; + //===----------------------------------------------------------------------===// -// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2) +// Assembler Instruction Aliases //===----------------------------------------------------------------------===// -include "X86InstrMMX.td" +// aad/aam default to base 10 if no operand is specified. +def : InstAlias<"aad", (AAD8i8 10)>; +def : InstAlias<"aam", (AAM8i8 10)>; + +// Disambiguate the mem/imm form of bt-without-a-suffix as btl. +def : InstAlias<"bt $imm, $mem", (BT32mi8 i32mem:$mem, i32i8imm:$imm)>; + +// clr aliases. +def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg)>; +def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>; +def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>; +def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>; + +// div and idiv aliases for explicit A register. +def : InstAlias<"divb $src, %al", (DIV8r GR8 :$src)>; +def : InstAlias<"divw $src, %ax", (DIV16r GR16:$src)>; +def : InstAlias<"divl $src, %eax", (DIV32r GR32:$src)>; +def : InstAlias<"divq $src, %rax", (DIV64r GR64:$src)>; +def : InstAlias<"divb $src, %al", (DIV8m i8mem :$src)>; +def : InstAlias<"divw $src, %ax", (DIV16m i16mem:$src)>; +def : InstAlias<"divl $src, %eax", (DIV32m i32mem:$src)>; +def : InstAlias<"divq $src, %rax", (DIV64m i64mem:$src)>; +def : InstAlias<"idivb $src, %al", (IDIV8r GR8 :$src)>; +def : InstAlias<"idivw $src, %ax", (IDIV16r GR16:$src)>; +def : InstAlias<"idivl $src, %eax", (IDIV32r GR32:$src)>; +def : InstAlias<"idivq $src, %rax", (IDIV64r GR64:$src)>; +def : InstAlias<"idivb $src, %al", (IDIV8m i8mem :$src)>; +def : InstAlias<"idivw $src, %ax", (IDIV16m i16mem:$src)>; +def : InstAlias<"idivl $src, %eax", (IDIV32m i32mem:$src)>; +def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>; + + + +// Various unary fpstack operations default to operating on on ST1. +// For example, "fxch" -> "fxch %st(1)" +def : InstAlias<"faddp", (ADD_FPrST0 ST1)>; +def : InstAlias<"fsubp", (SUBR_FPrST0 ST1)>; +def : InstAlias<"fsubrp", (SUB_FPrST0 ST1)>; +def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>; +def : InstAlias<"fdivp", (DIVR_FPrST0 ST1)>; +def : InstAlias<"fdivrp", (DIV_FPrST0 ST1)>; +def : InstAlias<"fxch", (XCH_F ST1)>; +def : InstAlias<"fcomi", (COM_FIr ST1)>; +def : InstAlias<"fcompi", (COM_FIPr ST1)>; +def : InstAlias<"fucom", (UCOM_Fr ST1)>; +def : InstAlias<"fucomp", (UCOM_FPr ST1)>; +def : InstAlias<"fucomi", (UCOM_FIr ST1)>; +def : InstAlias<"fucompi", (UCOM_FIPr ST1)>; + +// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op. +// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate +// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with +// gas. +multiclass FpUnaryAlias<string Mnemonic, Instruction Inst> { + def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"), (Inst RST:$op)>; + def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"), (Inst ST0)>; +} + +defm : FpUnaryAlias<"fadd", ADD_FST0r>; +defm : FpUnaryAlias<"faddp", ADD_FPrST0>; +defm : FpUnaryAlias<"fsub", SUB_FST0r>; +defm : FpUnaryAlias<"fsubp", SUBR_FPrST0>; +defm : FpUnaryAlias<"fsubr", SUBR_FST0r>; +defm : FpUnaryAlias<"fsubrp", SUB_FPrST0>; +defm : FpUnaryAlias<"fmul", MUL_FST0r>; +defm : FpUnaryAlias<"fmulp", MUL_FPrST0>; +defm : FpUnaryAlias<"fdiv", DIV_FST0r>; +defm : FpUnaryAlias<"fdivp", DIVR_FPrST0>; +defm : FpUnaryAlias<"fdivr", DIVR_FST0r>; +defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>; +defm : FpUnaryAlias<"fcomi", COM_FIr>; +defm : FpUnaryAlias<"fucomi", UCOM_FIr>; +defm : FpUnaryAlias<"fcompi", COM_FIPr>; +defm : FpUnaryAlias<"fucompi", UCOM_FIPr>; + + +// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they +// commute. We also allow fdiv[r]p/fsubrp even though they don't commute, +// solely because gas supports it. +def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>; +def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>; +def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>; +def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>; +def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>; + +// We accept "fnstsw %eax" even though it only writes %ax. +def : InstAlias<"fnstsw %eax", (FNSTSW8r)>; +def : InstAlias<"fnstsw %al" , (FNSTSW8r)>; +def : InstAlias<"fnstsw" , (FNSTSW8r)>; + +// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but +// this is compatible with what GAS does. +def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>; +def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>; +def : InstAlias<"lcall *$dst", (FARCALL32m opaque48mem:$dst)>; +def : InstAlias<"ljmp *$dst", (FARJMP32m opaque48mem:$dst)>; + +// "imul <imm>, B" is an alias for "imul <imm>, B, B". +def : InstAlias<"imulw $imm, $r", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm)>; +def : InstAlias<"imulw $imm, $r", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm)>; +def : InstAlias<"imull $imm, $r", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm)>; +def : InstAlias<"imull $imm, $r", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm)>; +def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>; +def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>; + +// inb %dx -> inb %al, %dx +def : InstAlias<"inb %dx", (IN8rr)>; +def : InstAlias<"inw %dx", (IN16rr)>; +def : InstAlias<"inl %dx", (IN32rr)>; +def : InstAlias<"inb $port", (IN8ri i8imm:$port)>; +def : InstAlias<"inw $port", (IN16ri i8imm:$port)>; +def : InstAlias<"inl $port", (IN32ri i8imm:$port)>; + + +// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp +def : InstAlias<"call $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>; +def : InstAlias<"jmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>; +def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>; +def : InstAlias<"jmpw $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>; +def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>; +def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>; + +// Force mov without a suffix with a segment and mem to prefer the 'l' form of +// the move. All segment/mem forms are equivalent, this has the shortest +// encoding. +def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem)>; +def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>; + +// Match 'movq <largeimm>, <reg>' as an alias for movabsq. +def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>; + +// Match 'movq GR64, MMX' as an alias for movd. +def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>; +def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>; + +// movsd with no operands (as opposed to the SSE scalar move of a double) is an +// alias for movsl. (as in rep; movsd) +def : InstAlias<"movsd", (MOVSD)>; + +// movsx aliases +def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src)>; + +// movzx aliases +def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src)>; +def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src)>; +def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src)>; +def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src)>; +def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src)>; +def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src)>; +// Note: No GR32->GR64 movzx form. + +// outb %dx -> outb %al, %dx +def : InstAlias<"outb %dx", (OUT8rr)>; +def : InstAlias<"outw %dx", (OUT16rr)>; +def : InstAlias<"outl %dx", (OUT32rr)>; +def : InstAlias<"outb $port", (OUT8ir i8imm:$port)>; +def : InstAlias<"outw $port", (OUT16ir i8imm:$port)>; +def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>; + +// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same +// effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity +// errors, since its encoding is the most compact. +def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>; + +// shld/shrd op,op -> shld op, op, 1 +def : InstAlias<"shldw $r1, $r2", (SHLD16rri8 GR16:$r1, GR16:$r2, 1)>; +def : InstAlias<"shldl $r1, $r2", (SHLD32rri8 GR32:$r1, GR32:$r2, 1)>; +def : InstAlias<"shldq $r1, $r2", (SHLD64rri8 GR64:$r1, GR64:$r2, 1)>; +def : InstAlias<"shrdw $r1, $r2", (SHRD16rri8 GR16:$r1, GR16:$r2, 1)>; +def : InstAlias<"shrdl $r1, $r2", (SHRD32rri8 GR32:$r1, GR32:$r2, 1)>; +def : InstAlias<"shrdq $r1, $r2", (SHRD64rri8 GR64:$r1, GR64:$r2, 1)>; + +def : InstAlias<"shldw $mem, $reg", (SHLD16mri8 i16mem:$mem, GR16:$reg, 1)>; +def : InstAlias<"shldl $mem, $reg", (SHLD32mri8 i32mem:$mem, GR32:$reg, 1)>; +def : InstAlias<"shldq $mem, $reg", (SHLD64mri8 i64mem:$mem, GR64:$reg, 1)>; +def : InstAlias<"shrdw $mem, $reg", (SHRD16mri8 i16mem:$mem, GR16:$reg, 1)>; +def : InstAlias<"shrdl $mem, $reg", (SHRD32mri8 i32mem:$mem, GR32:$reg, 1)>; +def : InstAlias<"shrdq $mem, $reg", (SHRD64mri8 i64mem:$mem, GR64:$reg, 1)>; + +/* FIXME: This is disabled because the asm matcher is currently incapable of + * matching a fixed immediate like $1. +// "shl X, $1" is an alias for "shl X". +multiclass ShiftRotateByOneAlias<string Mnemonic, string Opc> { + def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "8r1")) GR8:$op)>; + def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "16r1")) GR16:$op)>; + def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "32r1")) GR32:$op)>; + def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "64r1")) GR64:$op)>; + def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "8m1")) i8mem:$op)>; + def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "16m1")) i16mem:$op)>; + def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "32m1")) i32mem:$op)>; + def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "64m1")) i64mem:$op)>; +} + +defm : ShiftRotateByOneAlias<"rcl", "RCL">; +defm : ShiftRotateByOneAlias<"rcr", "RCR">; +defm : ShiftRotateByOneAlias<"rol", "ROL">; +defm : ShiftRotateByOneAlias<"ror", "ROR">; +FIXME */ + +// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms. +def : InstAlias<"testb $val, $mem", (TEST8rm GR8 :$val, i8mem :$mem)>; +def : InstAlias<"testw $val, $mem", (TEST16rm GR16:$val, i16mem:$mem)>; +def : InstAlias<"testl $val, $mem", (TEST32rm GR32:$val, i32mem:$mem)>; +def : InstAlias<"testq $val, $mem", (TEST64rm GR64:$val, i64mem:$mem)>; + +// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms. +def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>; +def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>; +def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>; +def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 11d4179..bb2165a 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -1,4 +1,4 @@ -//====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===// +//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -11,6 +11,9 @@ // and properties of the instructions which are needed for code generation, // machine code emission, and analysis. // +// All instructions that use MMX should be in this file, even if they also use +// SSE. +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -18,58 +21,23 @@ //===----------------------------------------------------------------------===// let Constraints = "$src1 = $dst" in { - // MMXI_binop_rm - Simple MMX binary operator. - multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Commutable = 0> { - def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> { - let isCommutable = Commutable; - } - def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (OpVT (OpNode VR64:$src1, - (bitconvert - (load_mmx addr:$src2)))))]>; - } - + // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. + // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp. multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, bit Commutable = 0> { - def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), + def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> { let isCommutable = Commutable; } - def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), + def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId VR64:$src1, (bitconvert (load_mmx addr:$src2))))]>; } - // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64. - // - // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew - // to collapse (bitconvert VT to VT) into its operand. - // - multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode, - bit Commutable = 0> { - def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> { - let isCommutable = Commutable; - } - def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, - (OpNode VR64:$src1,(load_mmx addr:$src2)))]>; - } - multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, Intrinsic IntId2> { @@ -89,14 +57,75 @@ let Constraints = "$src1 = $dst" in { } } +/// Unary MMX instructions requiring SSSE3. +multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, + Intrinsic IntId64> { + def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR64:$dst, (IntId64 VR64:$src))]>; + + def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR64:$dst, + (IntId64 (bitconvert (memopmmx addr:$src))))]>; +} + +/// Binary MMX instructions requiring SSSE3. +let ImmT = NoImm, Constraints = "$src1 = $dst" in { +multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, + Intrinsic IntId64> { + let isCommutable = 0 in + def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>; + def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR64:$dst, + (IntId64 VR64:$src1, + (bitconvert (memopmmx addr:$src2))))]>; +} +} + +/// PALIGN MMX instructions (require SSSE3). +multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { + def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>; + def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR64:$dst, (IntId VR64:$src1, + (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>; +} + +multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d> { + def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (Int SrcRC:$src))], d>; + def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>; +} + +multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, + RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, + PatFrag ld_frag, string asm, Domain d> { + def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2), + asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>; + def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), + (ins DstRC:$src1, x86memop:$src2), asm, + [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>; +} + //===----------------------------------------------------------------------===// -// MMX EMMS & FEMMS Instructions +// MMX EMMS Instruction //===----------------------------------------------------------------------===// def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>; -def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", - [(int_x86_mmx_femms)]>; //===----------------------------------------------------------------------===// // MMX Scalar Instructions @@ -106,12 +135,12 @@ def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (scalar_to_vector GR32:$src)))]>; -let canFoldAsLoad = 1, isReMaterializable = 1 in + (x86mmx (scalar_to_vector GR32:$src)))]>; +let canFoldAsLoad = 1 in def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>; + (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>; let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; @@ -123,42 +152,41 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; -let neverHasSideEffects = 1 in // These are 64 bit moves, but since the OS X assembler doesn't // recognize a register-register movq, we write them as // movd. def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), - "movd\t{$src, $dst|$dst, $src}", []>; + "movd\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, + (bitconvert VR64:$src))]>; def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v1i64 (scalar_to_vector GR64:$src)))]>; - + (bitconvert GR64:$src))]>; let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1 in def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (load_mmx addr:$src))]>; def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", - [(store (v1i64 VR64:$src), addr:$dst)]>; + [(store (x86mmx VR64:$src), addr:$dst)]>; def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v1i64 (bitconvert + (x86mmx (bitconvert (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))))))]>; def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (movl immAllZerosV, - (v2i64 (scalar_to_vector - (i64 (bitconvert (v1i64 VR64:$src)))))))]>; + (v2i64 (scalar_to_vector + (i64 (bitconvert (x86mmx VR64:$src))))))]>; let neverHasSideEffects = 1 in def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), @@ -176,34 +204,40 @@ let AddedComplexity = 15 in def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>; + (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>; let AddedComplexity = 20 in def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (X86vzmovl (v2i32 + (x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))))]>; // Arithmetic Instructions - +defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>; +defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>; +defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>; // -- Addition -defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8, 1>; -defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>; -defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>; -defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>; - +defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>; +defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>; +defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>; +defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>; defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>; defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>; defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>; defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>; +defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>; +defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>; +defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>; + + // -- Subtraction -defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>; -defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>; -defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>; -defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>; +defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>; +defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>; +defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>; +defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>; defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>; defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>; @@ -211,16 +245,25 @@ defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>; defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>; defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>; +defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>; +defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>; +defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>; + // -- Multiplication -defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>; +defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>; defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>; defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>; defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>; +let isCommutable = 1 in +defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", + int_x86_ssse3_pmul_hr_sw>; // -- Miscellanea defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>; +defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw", + int_x86_ssse3_pmadd_ub_sw>; defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>; defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>; @@ -232,23 +275,17 @@ defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>; defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>; -// Logical Instructions -defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>; -defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>; -defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>; +defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>; +defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>; +defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>; +let Constraints = "$src1 = $dst" in + defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>; -let Constraints = "$src1 = $dst" in { - def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1), - VR64:$src2)))]>; - def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1), - (load addr:$src2))))]>; -} +// Logical Instructions +defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>; +defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>; +defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>; +defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, 1>; // Shift Instructions defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", @@ -270,12 +307,6 @@ defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_mmx_psra_d, int_x86_mmx_psrai_d>; -// Shift up / down and insert zero's. -def : Pat<(v1i64 (X86vshl VR64:$src, (i8 imm:$amt))), - (MMX_PSLLQri VR64:$src, (GetLo32XForm imm:$amt))>; -def : Pat<(v1i64 (X86vshr VR64:$src, (i8 imm:$amt))), - (MMX_PSRLQri VR64:$src, (GetLo32XForm imm:$amt))>; - // Comparison Instructions defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>; defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>; @@ -285,84 +316,19 @@ defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>; defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>; defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>; -// Conversion Instructions - // -- Unpack Instructions -let Constraints = "$src1 = $dst" in { - // Unpack High Packed Data Instructions - def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v8i8 (mmx_unpckh VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)))))]>; - - def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpckhwd\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpckhwd\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_unpckh VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)))))]>; - - def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpckhdq\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpckhdq\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v2i32 (mmx_unpckh VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)))))]>; - - // Unpack Low Packed Data Instructions - def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpcklbw\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpcklbw\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v8i8 (mmx_unpckl VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)))))]>; - - def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpcklwd\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpcklwd\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_unpckl VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)))))]>; - - def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpckldq\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpckldq\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v2i32 (mmx_unpckl VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)))))]>; -} +defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw", + int_x86_mmx_punpckhbw>; +defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd", + int_x86_mmx_punpckhwd>; +defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq", + int_x86_mmx_punpckhdq>; +defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw", + int_x86_mmx_punpcklbw>; +defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd", + int_x86_mmx_punpcklwd>; +defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq", + int_x86_mmx_punpckldq>; // -- Pack Instructions defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>; @@ -370,93 +336,80 @@ defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>; defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>; // -- Shuffle Instructions +defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>; + def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>; + (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>; def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)), - (undef)))]>; + (int_x86_sse_pshuf_w (load_mmx addr:$src1), + imm:$src2))]>; -// -- Conversion Instructions -let neverHasSideEffects = 1 in { -def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvtpd2pi\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), - (ins f128mem:$src), - "cvtpd2pi\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), - "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), - (ins i64mem:$src), - "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), - "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), - (ins i64mem:$src), - "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvtps2pi\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), - "cvtps2pi\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), - (ins f128mem:$src), - "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvttps2pi\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), - "cvttps2pi\t{$src, $dst|$dst, $src}", []>; -} // end neverHasSideEffects -// Extract / Insert -def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW", - SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>, - SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg, - (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2), +// -- Conversion Instructions +defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi, + f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; +defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi, + f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; +defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi, + f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; +defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi, + f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; +defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd, + i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; +let Constraints = "$src1 = $dst" in { + defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128, + int_x86_sse_cvtpi2ps, + i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; +} + +// Extract / Insert +def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg, + (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1), + [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1, (iPTR imm:$src2)))]>; let Constraints = "$src1 = $dst" in { - def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg, + def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, GR32:$src2,i16i8imm:$src3), + (ins VR64:$src1, GR32:$src2, i32i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), - GR32:$src2,(iPTR imm:$src3))))]>; - def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem, + [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, + GR32:$src2, (iPTR imm:$src3)))]>; + + def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3), + (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR64:$dst, - (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), - (i32 (anyext (loadi16 addr:$src2))), - (iPTR imm:$src3))))]>; + [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, + (i32 (anyext (loadi16 addr:$src2))), + (iPTR imm:$src3)))]>; } +// Mask creation +def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, + (int_x86_mmx_pmovmskb VR64:$src))]>; + + // MMX to XMM for vector types def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1, - [SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>; + [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>; def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; @@ -464,14 +417,19 @@ def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))), (v2i64 (MOVQI2PQIrm addr:$src))>; -def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert - (v2i32 (scalar_to_vector (loadi32 addr:$src))))))), +def : Pat<(v2i64 (MMX_X86movq2dq + (x86mmx (scalar_to_vector (loadi32 addr:$src))))), (v2i64 (MOVDI2PDIrm addr:$src))>; -// Mask creation -def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), - "pmovmskb\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>; +// Low word of XMM to MMX. +def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1, + [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>; + +def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)), + (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>; + +def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))), + (x86mmx (MMX_MOVQ64rm addr:$src))>; // Misc. let Uses = [EDI] in @@ -483,181 +441,14 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>; -//===----------------------------------------------------------------------===// -// Alias Instructions -//===----------------------------------------------------------------------===// - -// Alias instructions that map zero vector to pxor. -let isReMaterializable = 1, isCodeGenOnly = 1 in { - // FIXME: Change encoding to pseudo. - def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "", - [(set VR64:$dst, (v2i32 immAllZerosV))]>; - def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), "", - [(set VR64:$dst, (v2i32 immAllOnesV))]>; -} - -let Predicates = [HasMMX] in { - def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>; - def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>; - def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>; -} - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -//===----------------------------------------------------------------------===// - -// Store 64-bit integer vector values. -def : Pat<(store (v8i8 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -def : Pat<(store (v4i16 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -def : Pat<(store (v2i32 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -def : Pat<(store (v1i64 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; - -// Bit convert. -def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>; -def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>; -def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>; -def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>; -def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>; - // 64-bit bit convert. -def : Pat<(v1i64 (bitconvert (i64 GR64:$src))), +def : Pat<(x86mmx (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(v2i32 (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(v4i16 (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(v8i8 (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(i64 (bitconvert (v1i64 VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(i64 (bitconvert (v2i32 VR64:$src))), +def : Pat<(i64 (bitconvert (x86mmx VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(i64 (bitconvert (v4i16 VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(i64 (bitconvert (v8i8 VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(f64 (bitconvert (v1i64 VR64:$src))), - (MMX_MOVQ2FR64rr VR64:$src)>; -def : Pat<(f64 (bitconvert (v2i32 VR64:$src))), - (MMX_MOVQ2FR64rr VR64:$src)>; -def : Pat<(f64 (bitconvert (v4i16 VR64:$src))), +def : Pat<(f64 (bitconvert (x86mmx VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; -def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), - (MMX_MOVQ2FR64rr VR64:$src)>; -def : Pat<(v1i64 (bitconvert (f64 FR64:$src))), - (MMX_MOVFR642Qrr FR64:$src)>; -def : Pat<(v2i32 (bitconvert (f64 FR64:$src))), - (MMX_MOVFR642Qrr FR64:$src)>; -def : Pat<(v4i16 (bitconvert (f64 FR64:$src))), +def : Pat<(x86mmx (bitconvert (f64 FR64:$src))), (MMX_MOVFR642Qrr FR64:$src)>; -def : Pat<(v8i8 (bitconvert (f64 FR64:$src))), - (MMX_MOVFR642Qrr FR64:$src)>; - -let AddedComplexity = 20 in { - def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; -} - -// Clear top half. -let AddedComplexity = 15 in { - def : Pat<(v2i32 (X86vzmovl VR64:$src)), - (MMX_PUNPCKLDQrr VR64:$src, (v2i32 (MMX_V_SET0)))>; -} - -// Patterns to perform canonical versions of vector shuffling. -let AddedComplexity = 10 in { - def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))), - (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))), - (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))), - (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>; -} -let AddedComplexity = 10 in { - def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))), - (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))), - (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))), - (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; -} -// Some special case PANDN patterns. -// FIXME: Get rid of these. -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), - VR64:$src2)), - (MMX_PANDNrr VR64:$src1, VR64:$src2)>; -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), - (load addr:$src2))), - (MMX_PANDNrm VR64:$src1, addr:$src2)>; - -// Move MMX to lower 64-bit of XMM -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v4i16 VR64:$src))))), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v2i32 VR64:$src))))), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v1i64 VR64:$src))))), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; - -// Move lower 64-bit of XMM to MMX. -def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>; -def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>; -def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>; - -// Patterns for vector comparisons -def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, VR64:$src2)), - (MMX_PCMPEQBrr VR64:$src1, VR64:$src2)>; -def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPEQBrm VR64:$src1, addr:$src2)>; -def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, VR64:$src2)), - (MMX_PCMPEQWrr VR64:$src1, VR64:$src2)>; -def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPEQWrm VR64:$src1, addr:$src2)>; -def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, VR64:$src2)), - (MMX_PCMPEQDrr VR64:$src1, VR64:$src2)>; -def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPEQDrm VR64:$src1, addr:$src2)>; - -def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, VR64:$src2)), - (MMX_PCMPGTBrr VR64:$src1, VR64:$src2)>; -def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPGTBrm VR64:$src1, addr:$src2)>; -def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, VR64:$src2)), - (MMX_PCMPGTWrr VR64:$src1, VR64:$src2)>; -def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPGTWrm VR64:$src1, addr:$src2)>; -def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)), - (MMX_PCMPGTDrr VR64:$src1, VR64:$src2)>; -def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPGTDrm VR64:$src1, addr:$src2)>; - -// CMOV* - Used to implement the SELECT DAG operation. Expanded after -// instruction selection into a branch sequence. -let Uses = [EFLAGS], usesCustomInserter = 1 in { - def CMOV_V1I64 : I<0, Pseudo, - (outs VR64:$dst), (ins VR64:$t, VR64:$f, i8imm:$cond), - "#CMOV_V1I64 PSEUDO!", - [(set VR64:$dst, - (v1i64 (X86cmov VR64:$t, VR64:$f, imm:$cond, - EFLAGS)))]>; -} diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index f5466f8..b912949 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -15,43 +15,6 @@ //===----------------------------------------------------------------------===// -// SSE scalar FP Instructions -//===----------------------------------------------------------------------===// - -// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after -// instruction selection into a branch sequence. -let Uses = [EFLAGS], usesCustomInserter = 1 in { - def CMOV_FR32 : I<0, Pseudo, - (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond), - "#CMOV_FR32 PSEUDO!", - [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond, - EFLAGS))]>; - def CMOV_FR64 : I<0, Pseudo, - (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond), - "#CMOV_FR64 PSEUDO!", - [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond, - EFLAGS))]>; - def CMOV_V4F32 : I<0, Pseudo, - (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), - "#CMOV_V4F32 PSEUDO!", - [(set VR128:$dst, - (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond, - EFLAGS)))]>; - def CMOV_V2F64 : I<0, Pseudo, - (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), - "#CMOV_V2F64 PSEUDO!", - [(set VR128:$dst, - (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond, - EFLAGS)))]>; - def CMOV_V2I64 : I<0, Pseudo, - (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), - "#CMOV_V2I64 PSEUDO!", - [(set VR128:$dst, - (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond, - EFLAGS)))]>; -} - -//===----------------------------------------------------------------------===// // SSE 1 & 2 Instructions Classes //===----------------------------------------------------------------------===// @@ -82,17 +45,15 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", - !strconcat(SSEVer, !strconcat("_", - !strconcat(OpcodeStr, FPSizeStr)))) + [(set RC:$dst, (!cast<Intrinsic>( + !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) RC:$src1, RC:$src2))]>; def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", - !strconcat(SSEVer, !strconcat("_", - !strconcat(OpcodeStr, FPSizeStr)))) + [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", + SSEVer, "_", OpcodeStr, FPSizeStr)) RC:$src1, mem_cpat:$src2))]>; } @@ -142,17 +103,15 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_", - !strconcat(SSEVer, !strconcat("_", - !strconcat(OpcodeStr, FPSizeStr)))) + [(set RC:$dst, (!cast<Intrinsic>( + !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) RC:$src1, RC:$src2))], d>; def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_", - !strconcat(SSEVer, !strconcat("_", - !strconcat(OpcodeStr, FPSizeStr)))) + [(set RC:$dst, (!cast<Intrinsic>( + !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) RC:$src1, (mem_frag addr:$src2)))], d>; } @@ -221,6 +180,12 @@ def : Pat<(v4f32 (scalar_to_vector FR32:$src)), // Implicitly promote a 64-bit scalar to a vector. def : Pat<(v2f64 (scalar_to_vector FR64:$src)), (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; +// Implicitly promote a 32-bit scalar to a vector. +def : Pat<(v8f32 (scalar_to_vector FR32:$src)), + (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; +// Implicitly promote a 64-bit scalar to a vector. +def : Pat<(v4f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; let AddedComplexity = 20 in { // MOVSSrm zeros the high parts of the register; represent this @@ -403,7 +368,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, string asm_opr> { def PSrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - !strconcat(!strconcat(base_opc,"s"), asm_opr), + !strconcat(base_opc, "s", asm_opr), [(set RC:$dst, (mov_frag RC:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], @@ -411,7 +376,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, def PDrm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, f64mem:$src2), - !strconcat(!strconcat(base_opc,"d"), asm_opr), + !strconcat(base_opc, "d", asm_opr), [(set RC:$dst, (v2f64 (mov_frag RC:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], SSEPackedDouble>, TB, OpSize; @@ -598,14 +563,6 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, // Conversion Instructions Intrinsics - Match intrinsics which expect MM // and/or XMM operand(s). -multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, - Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, - string asm, Domain d> { - def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (Int SrcRC:$src))], d>; - def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>; -} multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, @@ -618,16 +575,6 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>; } -multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, - RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, - PatFrag ld_frag, string asm, Domain d> { - def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), - asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>; - def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), - (ins DstRC:$src1, x86memop:$src2), asm, - [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>; -} - multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, bit Is2Addr = 1> { @@ -669,13 +616,11 @@ defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, f32mem, load, "cvtss2si">, XS; defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si{q}">, XS, REX_W; -defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si">, XD; -defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, - f128mem, load, "cvtsd2si">, XD, REX_W; +defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, + f128mem, load, "cvtsd2si{l}">, XD; +defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, + f128mem, load, "cvtsd2si{q}">, XD, REX_W; -defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD, - REX_W; let isAsmParserOnly = 1 in { defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, @@ -705,29 +650,6 @@ let Constraints = "$src1 = $dst" in { "cvtsi2sd">, XD, REX_W; } -// Instructions below don't have an AVX form. -defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi, - f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB; -defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi, - f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}", - SSEPackedDouble>, TB, OpSize; -defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi, - f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB; -defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi, - f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}", - SSEPackedDouble>, TB, OpSize; -defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd, - i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}", - SSEPackedDouble>, TB, OpSize; -let Constraints = "$src1 = $dst" in { - defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128, - int_x86_sse_cvtpi2ps, - i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}", - SSEPackedSingle>, TB; -} - /// SSE 1 Only // Aliases for intrinsics @@ -738,10 +660,10 @@ defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse_cvttss2si64, f32mem, load, "cvttss2si">, XS, VEX, VEX_W; defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttss2si">, XD, VEX; + f128mem, load, "cvttsd2si">, XD, VEX; defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, f128mem, load, - "cvttss2si">, XD, VEX, VEX_W; + "cvttsd2si">, XD, VEX, VEX_W; } defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, f32mem, load, "cvttss2si">, XS; @@ -749,10 +671,10 @@ defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse_cvttss2si64, f32mem, load, "cvttss2si{q}">, XS, REX_W; defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttss2si">, XD; + f128mem, load, "cvttsd2si">, XD; defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, f128mem, load, - "cvttss2si{q}">, XD, REX_W; + "cvttsd2si{q}">, XD, REX_W; let isAsmParserOnly = 1, Pattern = []<dag> in { defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, @@ -790,6 +712,9 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V; } +def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, + Requires<[HasAVX]>; + def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))]>; @@ -817,6 +742,9 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>; } +def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>, + Requires<[HasAVX]>; + def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))]>, XS, @@ -973,9 +901,13 @@ def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", []>; + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq VR128:$src))]>; def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", []>; + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq (memop addr:$src)))]>; let isAsmParserOnly = 1 in { @@ -990,16 +922,6 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (memop addr:$src)))]>, XS, VEX, Requires<[HasAVX]>; } -def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq VR128:$src))]>, - XS, Requires<[HasSSE2]>; -def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttps2dq - (memop addr:$src)))]>, - XS, Requires<[HasSSE2]>; let isAsmParserOnly = 1 in { def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), @@ -1013,13 +935,13 @@ def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memop addr:$src)))]>, VEX; } -def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; -def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))]>; +def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; +def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memop addr:$src)))]>; let isAsmParserOnly = 1 in { // The assembler can recognize rr 256-bit instructions by seeing a ymm @@ -1469,9 +1391,11 @@ let AddedComplexity = 10 in { /// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, Domain d> { - def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(set GR32:$dst, (Int RC:$src))], d>; + def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W; } // Mask creation @@ -1522,6 +1446,12 @@ def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>, TB, OpSize; +def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, + Requires<[HasAVX]>, TB, OpSize, VEX_4V; +def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, + Requires<[HasAVX]>, TB, OpSize, VEX_4V; } // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper @@ -1654,19 +1584,13 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; let isCommutable = 0 in defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [ // single r+r - [(set VR128:$dst, (v2i64 (and (xor VR128:$src1, - (bc_v2i64 (v4i32 immAllOnesV))), - VR128:$src2)))], + [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))], // double r+r - [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), - (bc_v2i64 (v2f64 VR128:$src2))))], + [], // single r+m - [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)), - (bc_v2i64 (v4i32 immAllOnesV))), - (memopv2i64 addr:$src2))))], + [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))], // double r+m - [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), - (memopv2i64 addr:$src2)))]]>; + []]>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Arithmetic Instructions @@ -2170,7 +2094,7 @@ def : Pat<(X86SFence), (SFENCE)>; // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. // FIXME: Change encoding to pseudo! This is blocked right now by the x86 -// JIT implementatioan, it does not expand the instructions below like +// JIT implementation, it does not expand the instructions below like // X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isCodeGenOnly = 1 in { @@ -2277,6 +2201,10 @@ let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", []>; +def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", + []>, XS, Requires<[HasSSE2]>; + let canFoldAsLoad = 1, mayLoad = 1 in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", @@ -2606,15 +2534,11 @@ let ExeDomain = SSEPackedInt in { } def PANDNrr : PDI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - VR128:$src2)))]>; + "pandn\t{$src2, $dst|$dst, $src2}", []>; def PANDNrm : PDI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - (memopv2i64 addr:$src2))))]>; + "pandn\t{$src2, $dst|$dst, $src2}", []>; } } // Constraints = "$src1 = $dst" @@ -3009,6 +2933,13 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; +def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector GR64:$src)))]>; +def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert GR64:$src))]>; // Move Int Doubleword to Single Scalar @@ -3051,6 +2982,21 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)]>; +def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), + (iPTR 0)))]>; +def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>; + +def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (bitconvert FR64:$src))]>; +def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; + // Move Scalar Single to Double Int let isAsmParserOnly = 1 in { def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), @@ -3532,18 +3478,6 @@ let Constraints = "$src1 = $dst" in { // SSSE3 - Packed Absolute Instructions //===---------------------------------------------------------------------===// -/// SS3I_unop_rm_int_mm - Simple SSSE3 unary whose type can be v*{i8,i16,i32}. -multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, - PatFrag mem_frag64, Intrinsic IntId64> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, (IntId64 VR64:$src))]>; - - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, - (IntId64 (bitconvert (mem_frag64 addr:$src))))]>; -} /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, @@ -3572,19 +3506,11 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in { } defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8, - int_x86_ssse3_pabs_b_128>, - SS3I_unop_rm_int_mm<0x1C, "pabsb", memopv8i8, - int_x86_ssse3_pabs_b>; - + int_x86_ssse3_pabs_b_128>; defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16, - int_x86_ssse3_pabs_w_128>, - SS3I_unop_rm_int_mm<0x1D, "pabsw", memopv4i16, - int_x86_ssse3_pabs_w>; - + int_x86_ssse3_pabs_w_128>; defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32, - int_x86_ssse3_pabs_d_128>, - SS3I_unop_rm_int_mm<0x1E, "pabsd", memopv2i32, - int_x86_ssse3_pabs_d>; + int_x86_ssse3_pabs_d_128>; //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions @@ -3611,20 +3537,6 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, (IntId128 VR128:$src1, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, - PatFrag mem_frag64, Intrinsic IntId64> { - let isCommutable = 1 in - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>; - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, - (IntId64 VR64:$src1, - (bitconvert (memopv8i8 addr:$src2))))]>; -} let isAsmParserOnly = 1, Predicates = [HasAVX] in { let isCommutable = 0 in { @@ -3659,54 +3571,30 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16, let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16, - int_x86_ssse3_phadd_w_128>, - SS3I_binop_rm_int_mm<0x01, "phaddw", memopv4i16, - int_x86_ssse3_phadd_w>; + int_x86_ssse3_phadd_w_128>; defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32, - int_x86_ssse3_phadd_d_128>, - SS3I_binop_rm_int_mm<0x02, "phaddd", memopv2i32, - int_x86_ssse3_phadd_d>; + int_x86_ssse3_phadd_d_128>; defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16, - int_x86_ssse3_phadd_sw_128>, - SS3I_binop_rm_int_mm<0x03, "phaddsw", memopv4i16, - int_x86_ssse3_phadd_sw>; + int_x86_ssse3_phadd_sw_128>; defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16, - int_x86_ssse3_phsub_w_128>, - SS3I_binop_rm_int_mm<0x05, "phsubw", memopv4i16, - int_x86_ssse3_phsub_w>; + int_x86_ssse3_phsub_w_128>; defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32, - int_x86_ssse3_phsub_d_128>, - SS3I_binop_rm_int_mm<0x06, "phsubd", memopv2i32, - int_x86_ssse3_phsub_d>; + int_x86_ssse3_phsub_d_128>; defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16, - int_x86_ssse3_phsub_sw_128>, - SS3I_binop_rm_int_mm<0x07, "phsubsw", memopv4i16, - int_x86_ssse3_phsub_sw>; + int_x86_ssse3_phsub_sw_128>; defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8, - int_x86_ssse3_pmadd_ub_sw_128>, - SS3I_binop_rm_int_mm<0x04, "pmaddubsw", memopv8i8, - int_x86_ssse3_pmadd_ub_sw>; - defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, - int_x86_ssse3_pshuf_b_128>, - SS3I_binop_rm_int_mm<0x00, "pshufb", memopv8i8, - int_x86_ssse3_pshuf_b>; + int_x86_ssse3_pmadd_ub_sw_128>; + defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8, + int_x86_ssse3_pshuf_b_128>; defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8, - int_x86_ssse3_psign_b_128>, - SS3I_binop_rm_int_mm<0x08, "psignb", memopv8i8, - int_x86_ssse3_psign_b>; + int_x86_ssse3_psign_b_128>; defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16, - int_x86_ssse3_psign_w_128>, - SS3I_binop_rm_int_mm<0x09, "psignw", memopv4i16, - int_x86_ssse3_psign_w>; + int_x86_ssse3_psign_w_128>; defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32, - int_x86_ssse3_psign_d_128>, - SS3I_binop_rm_int_mm<0x0A, "psignd", memopv2i32, - int_x86_ssse3_psign_d>; + int_x86_ssse3_psign_d_128>; } defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16, - int_x86_ssse3_pmul_hr_sw_128>, - SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", memopv4i16, - int_x86_ssse3_pmul_hr_sw>; + int_x86_ssse3_pmul_hr_sw_128>; } def : Pat<(X86pshufb VR128:$src, VR128:$mask), @@ -3714,19 +3602,17 @@ def : Pat<(X86pshufb VR128:$src, VR128:$mask), def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; +def : Pat<(X86psignb VR128:$src1, VR128:$src2), + (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>; +def : Pat<(X86psignw VR128:$src1, VR128:$src2), + (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>; +def : Pat<(X86psignd VR128:$src1, VR128:$src2), + (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>; + //===---------------------------------------------------------------------===// // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// -multiclass ssse3_palign_mm<string asm> { - def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; - def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; -} - multiclass ssse3_palign<string asm, bit Is2Addr = 1> { def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), @@ -3747,28 +3633,9 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { let isAsmParserOnly = 1, Predicates = [HasAVX] in defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; let Constraints = "$src1 = $dst" in - defm PALIGN : ssse3_palign<"palignr">, - ssse3_palign_mm<"palignr">; + defm PALIGN : ssse3_palign<"palignr">; let AddedComplexity = 5 in { - -def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)), - (PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)), - (PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)), - (PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)), - (PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; - def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)), (PALIGNR128rr VR128:$src2, VR128:$src1, (SHUFFLE_get_palign_imm VR128:$src3))>, @@ -3792,10 +3659,27 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), //===---------------------------------------------------------------------===// // Thread synchronization -def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", - [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; -def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", - [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; +let usesCustomInserter = 1 in { +def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), + [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>; +def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2), + [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>; +} + +let Uses = [EAX, ECX, EDX] in +def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB, + Requires<[HasSSE3]>; +let Uses = [ECX, EAX] in +def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB, + Requires<[HasSSE3]>; + +def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; +def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; + +def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>, + Requires<[In32BitMode]>; +def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>, + Requires<[In64BitMode]>; //===---------------------------------------------------------------------===// // Non-Instruction Patterns @@ -3811,7 +3695,7 @@ let Predicates = [HasSSE2] in (CVTSS2SDrm addr:$src)>; // bit_convert -let Predicates = [HasSSE2] in { +let Predicates = [HasXMMInt] in { def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; @@ -3844,6 +3728,10 @@ let Predicates = [HasSSE2] in { def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; } +let Predicates = [HasAVX] in { + def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; +} + // Move scalar to XMM zero-extended // movd to XMM register zero-extends let AddedComplexity = 15 in { @@ -4017,36 +3905,11 @@ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; -// Some special case pandn patterns. -def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))), - VR128:$src2)), - (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))), - VR128:$src2)), - (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))), - VR128:$src2)), - (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; - -def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))), - (memop addr:$src2))), - (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))), - (memop addr:$src2))), - (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))), - (memop addr:$src2))), - (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; - // vector -> vector casts def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), - (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))), - (Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>; -def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))), - (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>; + (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>; // Use movaps / movups for SSE integer load / store (one byte shorter). let Predicates = [HasSSE1] in { @@ -4504,7 +4367,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, Intrinsic V4F32Int, Intrinsic V2F64Int> { // Intrinsic operation, reg. // Vector intrinsic operation, reg - def PSr_Int : SS4AIi8<opcps, MRMSrcReg, + def PSr : SS4AIi8<opcps, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -4512,7 +4375,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, OpSize; // Vector intrinsic operation, mem - def PSm_Int : Ii8<opcps, MRMSrcMem, + def PSm : Ii8<opcps, MRMSrcMem, (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -4522,7 +4385,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, Requires<[HasSSE41]>; // Vector intrinsic operation, reg - def PDr_Int : SS4AIi8<opcpd, MRMSrcReg, + def PDr : SS4AIi8<opcpd, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -4530,7 +4393,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, OpSize; // Vector intrinsic operation, mem - def PDm_Int : SS4AIi8<opcpd, MRMSrcMem, + def PDm : SS4AIi8<opcpd, MRMSrcMem, (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -4543,28 +4406,28 @@ multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd, RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> { // Intrinsic operation, reg. // Vector intrinsic operation, reg - def PSr : SS4AIi8<opcps, MRMSrcReg, + def PSr_AVX : SS4AIi8<opcps, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; // Vector intrinsic operation, mem - def PSm : Ii8<opcps, MRMSrcMem, + def PSm_AVX : Ii8<opcps, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, TA, OpSize, Requires<[HasSSE41]>; // Vector intrinsic operation, reg - def PDr : SS4AIi8<opcpd, MRMSrcReg, + def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; // Vector intrinsic operation, mem - def PDm : SS4AIi8<opcpd, MRMSrcMem, + def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -4576,7 +4439,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, Intrinsic F32Int, Intrinsic F64Int, bit Is2Addr = 1> { // Intrinsic operation, reg. - def SSr_Int : SS4AIi8<opcss, MRMSrcReg, + def SSr : SS4AIi8<opcss, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, @@ -4587,7 +4450,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, OpSize; // Intrinsic operation, mem. - def SSm_Int : SS4AIi8<opcss, MRMSrcMem, + def SSm : SS4AIi8<opcss, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, @@ -4599,7 +4462,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, OpSize; // Intrinsic operation, reg. - def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, + def SDr : SS4AIi8<opcsd, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, @@ -4610,7 +4473,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, OpSize; // Intrinsic operation, mem. - def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, + def SDm : SS4AIi8<opcsd, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, @@ -4625,28 +4488,28 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd, string OpcodeStr> { // Intrinsic operation, reg. - def SSr : SS4AIi8<opcss, MRMSrcReg, + def SSr_AVX : SS4AIi8<opcss, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, OpSize; // Intrinsic operation, mem. - def SSm : SS4AIi8<opcss, MRMSrcMem, + def SSm_AVX : SS4AIi8<opcss, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, OpSize; // Intrinsic operation, reg. - def SDr : SS4AIi8<opcsd, MRMSrcReg, + def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, OpSize; // Intrinsic operation, mem. - def SDm : SS4AIi8<opcsd, MRMSrcMem, + def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), @@ -4743,6 +4606,29 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>; // SSE4.1 - Misc Instructions //===----------------------------------------------------------------------===// +def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "popcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS; +def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "popcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS; + +def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "popcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctpop GR32:$src))]>, XS; +def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "popcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS; + +def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "popcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctpop GR64:$src))]>, XS; +def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "popcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS; + + + // SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, Intrinsic IntId128> { @@ -4981,6 +4867,9 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; +def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0), + (PBLENDVBrr0 VR128:$src1, VR128:$src2)>; + let isAsmParserOnly = 1, Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", @@ -5032,12 +4921,12 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), // Packed Compare Implicit Length Strings, Return Mask multiclass pseudo_pcmpistrm<string asm> { - def REG : Ii8<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"), + def REG : PseudoI<(outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, imm:$src3))]>; - def MEM : Ii8<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"), + def MEM : PseudoI<(outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, (load addr:$src2), imm:$src3))]>; } @@ -5068,12 +4957,12 @@ let Defs = [XMM0, EFLAGS] in { // Packed Compare Explicit Length Strings, Return Mask multiclass pseudo_pcmpestrm<string asm> { - def REG : Ii8<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"), + def REG : PseudoI<(outs VR128:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; - def MEM : Ii8<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"), + def MEM : PseudoI<(outs VR128:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>; } @@ -5555,6 +5444,23 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; + def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), @@ -5562,6 +5468,23 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4f32 (VEXTRACTF128rr + (v8f32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2f64 (VEXTRACTF128rr + (v4f64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i32 (VEXTRACTF128rr + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2i64 (VEXTRACTF128rr + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), (VBROADCASTF128 addr:$src)>; @@ -5673,19 +5596,14 @@ def : Pat<(X86Movddup (memopv2f64 addr:$src)), def : Pat<(X86Movddup (memopv2f64 addr:$src)), (MOVDDUPrm addr:$src)>; -def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))), - (MOVDDUPrm addr:$src)>; - -def : Pat<(X86Movddup (memopv2i64 addr:$src)), +def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (memopv2i64 addr:$src)), +def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), (MOVDDUPrm addr:$src)>; -def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))), +def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))), +def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), (MOVDDUPrm addr:$src)>; def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), @@ -5700,6 +5618,7 @@ def : Pat<(X86Movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), (MOVDDUPrm addr:$src)>; + // Shuffle with UNPCKLPS def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; @@ -5724,9 +5643,9 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), // Shuffle with UNPCKLPD def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), - (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; + (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), - (UNPCKLPSrm VR128:$src1, addr:$src2)>; + (UNPCKLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; @@ -5735,9 +5654,9 @@ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), // Shuffle with UNPCKHPD def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), - (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; + (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), - (UNPCKLPSrm VR128:$src1, addr:$src2)>; + (UNPCKHPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; @@ -5812,10 +5731,18 @@ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; +// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem +// is during lowering, where it's not possible to recognize the load fold cause +// it has two uses through a bitcast. One use disappears at isel time and the +// fold opportunity reappears. +def : Pat<(v2f64 (X86Movddup VR128:$src)), + (UNPCKLPDrr VR128:$src, VR128:$src)>; + // Shuffle with MOVLHPD def : Pat<(v2f64 (X86Movlhpd VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (MOVHPDrm VR128:$src1, addr:$src2)>; + // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold cause // it has two uses through a bitcast. One use disappears at isel time and the @@ -5878,31 +5805,18 @@ def : Pat<(X86Movsldup (memopv4f32 addr:$src)), (MOVSLDUPrm addr:$src)>; // Shuffle with PSHUFHW -def : Pat<(v8i16 (X86PShufhwLd addr:$src, (i8 imm:$imm))), - (PSHUFHWmi addr:$src, imm:$imm)>; def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), (PSHUFHWri VR128:$src, imm:$imm)>; def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))), (PSHUFHWmi addr:$src, imm:$imm)>; // Shuffle with PSHUFLW -def : Pat<(v8i16 (X86PShuflwLd addr:$src, (i8 imm:$imm))), - (PSHUFLWmi addr:$src, imm:$imm)>; def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), (PSHUFLWri VR128:$src, imm:$imm)>; def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))), (PSHUFLWmi addr:$src, imm:$imm)>; // Shuffle with PALIGN -def : Pat<(v1i64 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), - (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; -def : Pat<(v2i32 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), - (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; -def : Pat<(v4i16 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), - (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; -def : Pat<(v8i8 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), - (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; - def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), @@ -5920,6 +5834,15 @@ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), def : Pat<(X86Movlps VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), (MOVLPSrm VR128:$src1, addr:$src2)>; +// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem +// is during lowering, where it's not possible to recognize the load fold cause +// it has two uses through a bitcast. One use disappears at isel time and the +// fold opportunity reappears. +def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>; + +def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; // Shuffle with MOVLPD def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td new file mode 100644 index 0000000..8278568 --- /dev/null +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -0,0 +1,746 @@ +//===- X86InstrShiftRotate.td - Shift and Rotate Instrs ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the shift and rotate instructions. +// +//===----------------------------------------------------------------------===// + +// FIXME: Someone needs to smear multipattern goodness all over this file. + +let Defs = [EFLAGS] in { + +let Constraints = "$src1 = $dst" in { +let Uses = [CL] in { +def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), + "shl{b}\t{%cl, $dst|$dst, CL}", + [(set GR8:$dst, (shl GR8:$src1, CL))]>; +def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1), + "shl{w}\t{%cl, $dst|$dst, CL}", + [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize; +def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1), + "shl{l}\t{%cl, $dst|$dst, CL}", + [(set GR32:$dst, (shl GR32:$src1, CL))]>; +def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), + "shl{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (shl GR64:$src1, CL))]>; +} // Uses = [CL] + +def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "shl{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>; + +let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. +def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "shl{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize; +def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "shl{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>; +def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), + "shl{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; + +// NOTE: We don't include patterns for shifts of a register by one, because +// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one). +def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1), + "shl{b}\t$dst", []>; +def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1), + "shl{w}\t$dst", []>, OpSize; +def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), + "shl{l}\t$dst", []>; +def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), + "shl{q}\t$dst", []>; +} // isConvertibleToThreeAddress = 1 +} // Constraints = "$src = $dst" + + +// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern +// using CL? +let Uses = [CL] in { +def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst), + "shl{b}\t{%cl, $dst|$dst, CL}", + [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>; +def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst), + "shl{w}\t{%cl, $dst|$dst, CL}", + [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; +def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst), + "shl{l}\t{%cl, $dst|$dst, CL}", + [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>; +def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), + "shl{q}\t{%cl, $dst|$dst, %CL}", + [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>; +} +def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src), + "shl{b}\t{$src, $dst|$dst, $src}", + [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src), + "shl{w}\t{$src, $dst|$dst, $src}", + [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; +def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src), + "shl{l}\t{$src, $dst|$dst, $src}", + [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src), + "shl{q}\t{$src, $dst|$dst, $src}", + [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + +// Shift by 1 +def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst), + "shl{b}\t$dst", + [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; +def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst), + "shl{w}\t$dst", + [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + OpSize; +def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst), + "shl{l}\t$dst", + [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), + "shl{q}\t$dst", + [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +let Constraints = "$src1 = $dst" in { +let Uses = [CL] in { +def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), + "shr{b}\t{%cl, $dst|$dst, CL}", + [(set GR8:$dst, (srl GR8:$src1, CL))]>; +def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1), + "shr{w}\t{%cl, $dst|$dst, CL}", + [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize; +def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1), + "shr{l}\t{%cl, $dst|$dst, CL}", + [(set GR32:$dst, (srl GR32:$src1, CL))]>; +def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1), + "shr{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (srl GR64:$src1, CL))]>; +} + +def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "shr{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>; +def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "shr{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize; +def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "shr{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>; +def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), + "shr{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>; + +// Shift right by 1 +def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1), + "shr{b}\t$dst", + [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>; +def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1), + "shr{w}\t$dst", + [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize; +def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), + "shr{l}\t$dst", + [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>; +def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), + "shr{q}\t$dst", + [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; +} // Constraints = "$src = $dst" + + +let Uses = [CL] in { +def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), + "shr{b}\t{%cl, $dst|$dst, CL}", + [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>; +def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst), + "shr{w}\t{%cl, $dst|$dst, CL}", + [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>, + OpSize; +def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst), + "shr{l}\t{%cl, $dst|$dst, CL}", + [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>; +def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), + "shr{q}\t{%cl, $dst|$dst, %CL}", + [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>; +} +def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src), + "shr{b}\t{$src, $dst|$dst, $src}", + [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src), + "shr{w}\t{$src, $dst|$dst, $src}", + [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; +def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src), + "shr{l}\t{$src, $dst|$dst, $src}", + [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src), + "shr{q}\t{$src, $dst|$dst, $src}", + [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + +// Shift by 1 +def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst), + "shr{b}\t$dst", + [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; +def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst), + "shr{w}\t$dst", + [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize; +def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst), + "shr{l}\t$dst", + [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), + "shr{q}\t$dst", + [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +let Constraints = "$src1 = $dst" in { +let Uses = [CL] in { +def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), + "sar{b}\t{%cl, $dst|$dst, CL}", + [(set GR8:$dst, (sra GR8:$src1, CL))]>; +def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1), + "sar{w}\t{%cl, $dst|$dst, CL}", + [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize; +def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1), + "sar{l}\t{%cl, $dst|$dst, CL}", + [(set GR32:$dst, (sra GR32:$src1, CL))]>; +def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1), + "sar{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (sra GR64:$src1, CL))]>; +} + +def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "sar{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>; +def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "sar{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>, + OpSize; +def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "sar{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>; +def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), + "sar{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>; + +// Shift by 1 +def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), + "sar{b}\t$dst", + [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>; +def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1), + "sar{w}\t$dst", + [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize; +def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), + "sar{l}\t$dst", + [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>; +def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), + "sar{q}\t$dst", + [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; +} // Constraints = "$src = $dst" + + +let Uses = [CL] in { +def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), + "sar{b}\t{%cl, $dst|$dst, CL}", + [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>; +def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst), + "sar{w}\t{%cl, $dst|$dst, CL}", + [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; +def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), + "sar{l}\t{%cl, $dst|$dst, CL}", + [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>; +def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), + "sar{q}\t{%cl, $dst|$dst, %CL}", + [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>; +} +def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src), + "sar{b}\t{$src, $dst|$dst, $src}", + [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src), + "sar{w}\t{$src, $dst|$dst, $src}", + [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; +def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src), + "sar{l}\t{$src, $dst|$dst, $src}", + [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src), + "sar{q}\t{$src, $dst|$dst, $src}", + [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + +// Shift by 1 +def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst), + "sar{b}\t$dst", + [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; +def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst), + "sar{w}\t$dst", + [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + OpSize; +def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst), + "sar{l}\t$dst", + [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), + "sar{q}\t$dst", + [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +//===----------------------------------------------------------------------===// +// Rotate instructions +//===----------------------------------------------------------------------===// + +let Constraints = "$src1 = $dst" in { +def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), + "rcl{b}\t$dst", []>; +def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in +def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; + +def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1), + "rcl{w}\t$dst", []>, OpSize; +def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +let Uses = [CL] in +def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; + +def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1), + "rcl{l}\t$dst", []>; +def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in +def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; + + +def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1), + "rcl{q}\t$dst", []>; +def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in +def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; + + +def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1), + "rcr{b}\t$dst", []>; +def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in +def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; + +def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1), + "rcr{w}\t$dst", []>, OpSize; +def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +let Uses = [CL] in +def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; + +def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1), + "rcr{l}\t$dst", []>; +def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), + "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in +def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; + +def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1), + "rcr{q}\t$dst", []>; +def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in +def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; + +} // Constraints = "$src = $dst" + +def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t$dst", []>; +def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t$dst", []>, OpSize; +def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t$dst", []>; +def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t$dst", []>; +def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; + +def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t$dst", []>; +def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t$dst", []>, OpSize; +def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t$dst", []>; +def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt), + "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t$dst", []>; +def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + +let Uses = [CL] in { +def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; +def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; +def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; + +def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; +def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; +def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +} + +let Constraints = "$src1 = $dst" in { +// FIXME: provide shorter instructions when imm8 == 1 +let Uses = [CL] in { +def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), + "rol{b}\t{%cl, $dst|$dst, CL}", + [(set GR8:$dst, (rotl GR8:$src1, CL))]>; +def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1), + "rol{w}\t{%cl, $dst|$dst, CL}", + [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize; +def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1), + "rol{l}\t{%cl, $dst|$dst, CL}", + [(set GR32:$dst, (rotl GR32:$src1, CL))]>; +def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1), + "rol{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (rotl GR64:$src1, CL))]>; +} + +def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "rol{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>; +def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "rol{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, + OpSize; +def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "rol{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>; +def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), + "rol{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>; + +// Rotate by 1 +def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), + "rol{b}\t$dst", + [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>; +def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1), + "rol{w}\t$dst", + [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize; +def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), + "rol{l}\t$dst", + [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>; +def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), + "rol{q}\t$dst", + [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; +} // Constraints = "$src = $dst" + +let Uses = [CL] in { +def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), + "rol{b}\t{%cl, $dst|$dst, CL}", + [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>; +def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst), + "rol{w}\t{%cl, $dst|$dst, CL}", + [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; +def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst), + "rol{l}\t{%cl, $dst|$dst, CL}", + [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>; +def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), + "rol{q}\t{%cl, $dst|$dst, %CL}", + [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>; +} +def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1), + "rol{b}\t{$src1, $dst|$dst, $src1}", + [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; +def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1), + "rol{w}\t{$src1, $dst|$dst, $src1}", + [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>, + OpSize; +def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1), + "rol{l}\t{$src1, $dst|$dst, $src1}", + [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; +def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1), + "rol{q}\t{$src1, $dst|$dst, $src1}", + [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; + +// Rotate by 1 +def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst), + "rol{b}\t$dst", + [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; +def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst), + "rol{w}\t$dst", + [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + OpSize; +def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst), + "rol{l}\t$dst", + [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), + "rol{q}\t$dst", + [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +let Constraints = "$src1 = $dst" in { +let Uses = [CL] in { +def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), + "ror{b}\t{%cl, $dst|$dst, CL}", + [(set GR8:$dst, (rotr GR8:$src1, CL))]>; +def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1), + "ror{w}\t{%cl, $dst|$dst, CL}", + [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize; +def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1), + "ror{l}\t{%cl, $dst|$dst, CL}", + [(set GR32:$dst, (rotr GR32:$src1, CL))]>; +def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1), + "ror{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (rotr GR64:$src1, CL))]>; +} + +def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "ror{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>; +def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "ror{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, + OpSize; +def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "ror{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>; +def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), + "ror{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>; + +// Rotate by 1 +def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), + "ror{b}\t$dst", + [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>; +def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1), + "ror{w}\t$dst", + [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize; +def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), + "ror{l}\t$dst", + [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>; +def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), + "ror{q}\t$dst", + [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; +} // Constraints = "$src = $dst" + +let Uses = [CL] in { +def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), + "ror{b}\t{%cl, $dst|$dst, CL}", + [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>; +def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst), + "ror{w}\t{%cl, $dst|$dst, CL}", + [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; +def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), + "ror{l}\t{%cl, $dst|$dst, CL}", + [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>; +def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), + "ror{q}\t{%cl, $dst|$dst, %CL}", + [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>; +} +def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src), + "ror{b}\t{$src, $dst|$dst, $src}", + [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src), + "ror{w}\t{$src, $dst|$dst, $src}", + [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; +def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src), + "ror{l}\t{$src, $dst|$dst, $src}", + [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src), + "ror{q}\t{$src, $dst|$dst, $src}", + [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + +// Rotate by 1 +def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst), + "ror{b}\t$dst", + [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; +def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst), + "ror{w}\t$dst", + [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + OpSize; +def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst), + "ror{l}\t$dst", + [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), + "ror{q}\t$dst", + [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + + +//===----------------------------------------------------------------------===// +// Double shift instructions (generalizations of rotate) +//===----------------------------------------------------------------------===// + +let Constraints = "$src1 = $dst" in { + +let Uses = [CL] in { +def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>, + TB, OpSize; +def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>, + TB, OpSize; +def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB; +def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB; +def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, + TB; +def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, + TB; +} + +let isCommutable = 1 in { // These instructions commute to each other. +def SHLD16rri8 : Ii8<0xA4, MRMDestReg, + (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2, i8imm:$src3), + "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, + (i8 imm:$src3)))]>, + TB, OpSize; +def SHRD16rri8 : Ii8<0xAC, MRMDestReg, + (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2, i8imm:$src3), + "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, + (i8 imm:$src3)))]>, + TB, OpSize; +def SHLD32rri8 : Ii8<0xA4, MRMDestReg, + (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2, i8imm:$src3), + "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, + (i8 imm:$src3)))]>, + TB; +def SHRD32rri8 : Ii8<0xAC, MRMDestReg, + (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2, i8imm:$src3), + "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, + (i8 imm:$src3)))]>, + TB; +def SHLD64rri8 : RIi8<0xA4, MRMDestReg, + (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2, i8imm:$src3), + "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, + (i8 imm:$src3)))]>, + TB; +def SHRD64rri8 : RIi8<0xAC, MRMDestReg, + (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2, i8imm:$src3), + "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, + (i8 imm:$src3)))]>, + TB; +} +} // Constraints = "$src = $dst" + +let Uses = [CL] in { +def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL), + addr:$dst)]>, TB, OpSize; +def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL), + addr:$dst)]>, TB, OpSize; + +def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL), + addr:$dst)]>, TB; +def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL), + addr:$dst)]>, TB; + +def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL), + addr:$dst)]>, TB; +def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL), + addr:$dst)]>, TB; +} + +def SHLD16mri8 : Ii8<0xA4, MRMDestMem, + (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), + "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shld (loadi16 addr:$dst), GR16:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB, OpSize; +def SHRD16mri8 : Ii8<0xAC, MRMDestMem, + (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), + "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB, OpSize; + +def SHLD32mri8 : Ii8<0xA4, MRMDestMem, + (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), + "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shld (loadi32 addr:$dst), GR32:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB; +def SHRD32mri8 : Ii8<0xAC, MRMDestMem, + (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), + "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB; + +def SHLD64mri8 : RIi8<0xA4, MRMDestMem, + (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), + "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shld (loadi64 addr:$dst), GR64:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB; +def SHRD64mri8 : RIi8<0xAC, MRMDestMem, + (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), + "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB; + +} // Defs = [EFLAGS] + diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td new file mode 100644 index 0000000..1a58ba0 --- /dev/null +++ b/lib/Target/X86/X86InstrSystem.td @@ -0,0 +1,390 @@ +//===- X86InstrSystem.td - System Instructions -------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 instructions that are generally used in +// privileged modes. These are not typically used by the compiler, but are +// supported for the assembler and disassembler. +// +//===----------------------------------------------------------------------===// + +let Defs = [RAX, RDX] in + def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB; + +let Defs = [RAX, RCX, RDX] in + def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; + +// CPU flow control instructions + +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in { + def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; + def UD2B : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB; +} + +def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>; +def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB; + +// Interrupt and SysCall Instructions. +let Uses = [EFLAGS] in + def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", + [(int_x86_int (i8 3))]>; +def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", + [(int_x86_int imm:$trap)]>; + +def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB; +def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB; +def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB, + Requires<[In64BitMode]>; + +def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB; + +def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB, + Requires<[In32BitMode]>; +def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB, + Requires<[In64BitMode]>; + +def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize; +def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>; +def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>, + Requires<[In64BitMode]>; + + +//===----------------------------------------------------------------------===// +// Input/Output Instructions. +// +let Defs = [AL], Uses = [DX] in +def IN8rr : I<0xEC, RawFrm, (outs), (ins), + "in{b}\t{%dx, %al|%AL, %DX}", []>; +let Defs = [AX], Uses = [DX] in +def IN16rr : I<0xED, RawFrm, (outs), (ins), + "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize; +let Defs = [EAX], Uses = [DX] in +def IN32rr : I<0xED, RawFrm, (outs), (ins), + "in{l}\t{%dx, %eax|%EAX, %DX}", []>; + +let Defs = [AL] in +def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port), + "in{b}\t{$port, %al|%AL, $port}", []>; +let Defs = [AX] in +def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port), + "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize; +let Defs = [EAX] in +def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port), + "in{l}\t{$port, %eax|%EAX, $port}", []>; + +let Uses = [DX, AL] in +def OUT8rr : I<0xEE, RawFrm, (outs), (ins), + "out{b}\t{%al, %dx|%DX, %AL}", []>; +let Uses = [DX, AX] in +def OUT16rr : I<0xEF, RawFrm, (outs), (ins), + "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize; +let Uses = [DX, EAX] in +def OUT32rr : I<0xEF, RawFrm, (outs), (ins), + "out{l}\t{%eax, %dx|%DX, %EAX}", []>; + +let Uses = [AL] in +def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port), + "out{b}\t{%al, $port|$port, %AL}", []>; +let Uses = [AX] in +def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), + "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize; +let Uses = [EAX] in +def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), + "out{l}\t{%eax, $port|$port, %EAX}", []>; + +def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>; +def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>, OpSize; +def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", []>; + +//===----------------------------------------------------------------------===// +// Moves to and from debug registers + +def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + +//===----------------------------------------------------------------------===// +// Moves to and from control registers + +def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + +//===----------------------------------------------------------------------===// +// Segment override instruction prefixes + +def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>; +def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>; +def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>; +def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>; +def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>; +def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; + + +//===----------------------------------------------------------------------===// +// Moves to and from segment registers. +// + +def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; +def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; + +def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; +def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; + +def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; +def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; + +def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; +def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; + +//===----------------------------------------------------------------------===// +// Segmentation support instructions. + +def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB; + +def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + +// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo. +def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), + "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; +// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo. +def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; +def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), + "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; +def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; + +def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins), + "str{w}\t{$dst}", []>, TB; +def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins), + "str{w}\t{$dst}", []>, TB; +def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), + "ltr{w}\t{$src}", []>, TB; +def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), + "ltr{w}\t{$src}", []>, TB; + +def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), + "push{w}\t%cs", []>, Requires<[In32BitMode]>, OpSize; +def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins), + "push{l}\t%cs", []>, Requires<[In32BitMode]>; +def PUSHSS16 : I<0x16, RawFrm, (outs), (ins), + "push{w}\t%ss", []>, Requires<[In32BitMode]>, OpSize; +def PUSHSS32 : I<0x16, RawFrm, (outs), (ins), + "push{l}\t%ss", []>, Requires<[In32BitMode]>; +def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins), + "push{w}\t%ds", []>, Requires<[In32BitMode]>, OpSize; +def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins), + "push{l}\t%ds", []>, Requires<[In32BitMode]>; +def PUSHES16 : I<0x06, RawFrm, (outs), (ins), + "push{w}\t%es", []>, Requires<[In32BitMode]>, OpSize; +def PUSHES32 : I<0x06, RawFrm, (outs), (ins), + "push{l}\t%es", []>, Requires<[In32BitMode]>; + +def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), + "push{w}\t%fs", []>, OpSize, TB; +def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), + "push{l}\t%fs", []>, TB, Requires<[In32BitMode]>; +def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), + "push{w}\t%gs", []>, OpSize, TB; +def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), + "push{l}\t%gs", []>, TB, Requires<[In32BitMode]>; + +def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), + "push{q}\t%fs", []>, TB; +def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), + "push{q}\t%gs", []>, TB; + +// No "pop cs" instruction. +def POPSS16 : I<0x17, RawFrm, (outs), (ins), + "pop{w}\t%ss", []>, OpSize, Requires<[In32BitMode]>; +def POPSS32 : I<0x17, RawFrm, (outs), (ins), + "pop{l}\t%ss", []> , Requires<[In32BitMode]>; + +def POPDS16 : I<0x1F, RawFrm, (outs), (ins), + "pop{w}\t%ds", []>, OpSize, Requires<[In32BitMode]>; +def POPDS32 : I<0x1F, RawFrm, (outs), (ins), + "pop{l}\t%ds", []> , Requires<[In32BitMode]>; + +def POPES16 : I<0x07, RawFrm, (outs), (ins), + "pop{w}\t%es", []>, OpSize, Requires<[In32BitMode]>; +def POPES32 : I<0x07, RawFrm, (outs), (ins), + "pop{l}\t%es", []> , Requires<[In32BitMode]>; + +def POPFS16 : I<0xa1, RawFrm, (outs), (ins), + "pop{w}\t%fs", []>, OpSize, TB; +def POPFS32 : I<0xa1, RawFrm, (outs), (ins), + "pop{l}\t%fs", []>, TB , Requires<[In32BitMode]>; +def POPFS64 : I<0xa1, RawFrm, (outs), (ins), + "pop{q}\t%fs", []>, TB; + +def POPGS16 : I<0xa9, RawFrm, (outs), (ins), + "pop{w}\t%gs", []>, OpSize, TB; +def POPGS32 : I<0xa9, RawFrm, (outs), (ins), + "pop{l}\t%gs", []>, TB , Requires<[In32BitMode]>; +def POPGS64 : I<0xa9, RawFrm, (outs), (ins), + "pop{q}\t%gs", []>, TB; + + +def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "lds{l}\t{$src, $dst|$dst, $src}", []>; + +def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "lss{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), + "lss{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "les{l}\t{$src, $dst|$dst, $src}", []>; + +def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), + "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB; + +def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), + "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB; + + +def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), + "verr\t$seg", []>, TB; +def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), + "verr\t$seg", []>, TB; +def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), + "verw\t$seg", []>, TB; +def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), + "verw\t$seg", []>, TB; + +//===----------------------------------------------------------------------===// +// Descriptor-table support instructions + +def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), + "sgdtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>; +def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), + "sgdt\t$dst", []>, TB; +def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), + "sidtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>; +def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), + "sidt\t$dst", []>, TB; +def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins), + "sldt{w}\t$dst", []>, TB, OpSize; +def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins), + "sldt{w}\t$dst", []>, TB; +def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins), + "sldt{l}\t$dst", []>, TB; + +// LLDT is not interpreted specially in 64-bit mode because there is no sign +// extension. +def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins), + "sldt{q}\t$dst", []>, TB; +def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), + "sldt{q}\t$dst", []>, TB; + +def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), + "lgdtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>; +def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), + "lgdt\t$src", []>, TB; +def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), + "lidtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>; +def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), + "lidt\t$src", []>, TB; +def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), + "lldt{w}\t$src", []>, TB; +def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), + "lldt{w}\t$src", []>, TB; + +//===----------------------------------------------------------------------===// +// Specialized register support +def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB; +def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB; +def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB; + +def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), + "smsw{w}\t$dst", []>, OpSize, TB; +def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), + "smsw{l}\t$dst", []>, TB; +// no m form encodable; use SMSW16m +def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), + "smsw{q}\t$dst", []>, TB; + +// For memory operands, there is only a 16-bit form +def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins), + "smsw{w}\t$dst", []>, TB; + +def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src), + "lmsw{w}\t$src", []>, TB; +def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), + "lmsw{w}\t$src", []>, TB; + +def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; + +//===----------------------------------------------------------------------===// +// Cache instructions +def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; +def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; + diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td new file mode 100644 index 0000000..daf61e4 --- /dev/null +++ b/lib/Target/X86/X86InstrVMX.td @@ -0,0 +1,54 @@ +//===- X86InstrVMX.td - VMX Instruction Set Extension ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the instructions that make up the Intel VMX instruction +// set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// VMX instructions + +// 66 0F 38 80 +def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8; +// 66 0F 38 81 +def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8; +// 0F 01 C1 +def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; +def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), + "vmclear\t$vmcs", []>, OpSize, TB; +// 0F 01 C2 +def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; +// 0F 01 C3 +def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB; +def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), + "vmptrld\t$vmcs", []>, TB; +def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins), + "vmptrst\t$vmcs", []>, TB; +def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src), + "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB; +def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), + "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB; +def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src), + "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB; +def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), + "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB; +def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB; +def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB; +def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; +def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; +// 0F 01 C4 +def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; +def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), + "vmxon\t{$vmxon}", []>, XS; + diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 6f0a8d9..3f88fa6 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -19,7 +19,7 @@ #include "llvm/Function.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/System/Valgrind.h" +#include "llvm/Support/Valgrind.h" #include <cstdlib> #include <cstring> using namespace llvm; @@ -127,9 +127,17 @@ extern "C" { "movaps %xmm6, 96(%rsp)\n" "movaps %xmm7, 112(%rsp)\n" // JIT callee +#ifdef _WIN64 + "subq $32, %rsp\n" + "movq %rbp, %rcx\n" // Pass prev frame and return address + "movq 8(%rbp), %rdx\n" + "call " ASMPREFIX "X86CompilationCallback2\n" + "addq $32, %rsp\n" +#else "movq %rbp, %rdi\n" // Pass prev frame and return address "movq 8(%rbp), %rsi\n" "call " ASMPREFIX "X86CompilationCallback2\n" +#endif // Restore all XMM arg registers "movaps 112(%rsp), %xmm7\n" "movaps 96(%rsp), %xmm6\n" @@ -333,11 +341,11 @@ extern "C" { extern "C" { #if !(defined (X86_64_JIT) && defined(_MSC_VER)) // the following function is called only from this translation unit, - // unless we are under 64bit Windows with MSC, where there is + // unless we are under 64bit Windows with MSC, where there is // no support for inline assembly static #endif -void ATTRIBUTE_USED +void LLVM_ATTRIBUTE_USED X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; assert(*RetAddrLoc == RetAddr && @@ -462,7 +470,7 @@ TargetJITInfo::StubLayout X86JITInfo::getStubLayout() { void *X86JITInfo::emitFunctionStub(const Function* F, void *Target, JITCodeEmitter &JCE) { - // Note, we cast to intptr_t here to silence a -pedantic warning that + // Note, we cast to intptr_t here to silence a -pedantic warning that // complains about casting a function pointer to a normal pointer. #if defined (X86_32_JIT) && !defined (_MSC_VER) bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback && diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index 36badb4..6686214 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ELF.h" using namespace llvm; enum AsmWriterFlavorTy { @@ -68,7 +69,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { DwarfUsesInlineInfoSection = true; // Exceptions handling - ExceptionsType = ExceptionHandling::Dwarf; + ExceptionsType = ExceptionHandling::DwarfTable; } X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { @@ -88,8 +89,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { SupportsDebugInformation = true; // Exceptions handling - ExceptionsType = ExceptionHandling::Dwarf; - + ExceptionsType = ExceptionHandling::DwarfTable; + // OpenBSD has buggy support for .quad in 32-bit mode, just split into two // .words. if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86) @@ -98,13 +99,15 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { const MCSection *X86ELFMCAsmInfo:: getNonexecutableStackSection(MCContext &Ctx) const { - return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS, - 0, SectionKind::getMetadata(), false); + return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, + 0, SectionKind::getMetadata()); } X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { - if (Triple.getArch() == Triple::x86_64) + if (Triple.getArch() == Triple::x86_64) { GlobalPrefix = ""; + PrivateGlobalPrefix = ".L"; + } AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 9564fe0..e6dc74e 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-emitter" +#define DEBUG_TYPE "mccodeemitter" #include "X86.h" #include "X86InstrInfo.h" #include "X86FixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -37,27 +38,6 @@ public: ~X86MCCodeEmitter() {} - unsigned getNumFixupKinds() const { - return 5; - } - - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - const static MCFixupKindInfo Infos[] = { - { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, - { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel }, - { "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel }, - { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, - { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel } - }; - - if (Kind < FirstTargetFixupKind) - return MCCodeEmitter::getFixupKindInfo(Kind); - - assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && - "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; - } - static unsigned GetX86RegNum(const MCOperand &MO) { return X86RegisterInfo::getX86RegNum(MO.getReg()); } @@ -170,41 +150,77 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) { unsigned Size = X86II::getSizeOfImm(TSFlags); bool isPCRel = X86II::isImmPCRel(TSFlags); - switch (Size) { - default: assert(0 && "Unknown immediate size"); - case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1; - case 2: return isPCRel ? MCFixupKind(X86::reloc_pcrel_2byte) : FK_Data_2; - case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4; - case 8: assert(!isPCRel); return FK_Data_8; - } + return MCFixup::getKindForSize(Size, isPCRel); +} + +/// Is32BitMemOperand - Return true if the specified instruction with a memory +/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit +/// memory operand. Op specifies the operand # of the memoperand. +static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) { + const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg()))) + return true; + return false; } +/// StartsWithGlobalOffsetTable - Return true for the simple cases where this +/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support +/// PIC on ELF i386 as that symbol is magic. We check only simple case that +/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start +/// of a binary expression. +static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) { + if (Expr->getKind() == MCExpr::Binary) { + const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr); + Expr = BE->getLHS(); + } + + if (Expr->getKind() != MCExpr::SymbolRef) + return false; + + const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); + const MCSymbol &S = Ref->getSymbol(); + return S.getName() == "_GLOBAL_OFFSET_TABLE_"; +} void X86MCCodeEmitter:: EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const { - // If this is a simple integer displacement that doesn't require a relocation, - // emit it now. + const MCExpr *Expr = NULL; if (DispOp.isImm()) { - // FIXME: is this right for pc-rel encoding?? Probably need to emit this as - // a fixup if so. - EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); - return; + // If this is a simple integer displacement that doesn't require a relocation, + // emit it now. + if (FixupKind != FK_PCRel_1 && + FixupKind != FK_PCRel_2 && + FixupKind != FK_PCRel_4) { + EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); + return; + } + Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx); + } else { + Expr = DispOp.getExpr(); } // If we have an immoffset, add it to the expression. - const MCExpr *Expr = DispOp.getExpr(); + if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) { + assert(ImmOffset == 0); + + FixupKind = MCFixupKind(X86::reloc_global_offset_table); + ImmOffset = CurByte; + } // If the fixup is pc-relative, we need to bias the value to be relative to // the start of the field, not the end of the field. - if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) || + if (FixupKind == FK_PCRel_4 || FixupKind == MCFixupKind(X86::reloc_riprel_4byte) || FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load)) ImmOffset -= 4; - if (FixupKind == MCFixupKind(X86::reloc_pcrel_2byte)) + if (FixupKind == FK_PCRel_2) ImmOffset -= 2; - if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte)) + if (FixupKind == FK_PCRel_1) ImmOffset -= 1; if (ImmOffset) @@ -221,10 +237,10 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const{ - const MCOperand &Disp = MI.getOperand(Op+3); - const MCOperand &Base = MI.getOperand(Op); - const MCOperand &Scale = MI.getOperand(Op+1); - const MCOperand &IndexReg = MI.getOperand(Op+2); + const MCOperand &Disp = MI.getOperand(Op+X86::AddrDisp); + const MCOperand &Base = MI.getOperand(Op+X86::AddrBaseReg); + const MCOperand &Scale = MI.getOperand(Op+X86::AddrScaleAmt); + const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); unsigned BaseReg = Base.getReg(); // Handle %rip relative addressing. @@ -238,8 +254,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // movq loads are handled with a special relocation form which allows the // linker to eliminate some loads for GOT references which end up in the // same linkage unit. - if (MI.getOpcode() == X86::MOV64rm || - MI.getOpcode() == X86::MOV64rm_TC) + if (MI.getOpcode() == X86::MOV64rm) FixupKind = X86::reloc_riprel_4byte_movq_load; // rip-relative addressing is actually relative to the *next* instruction. @@ -295,7 +310,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Otherwise, emit the most general non-SIB encoding: [REG+disp32] EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); - EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS, + Fixups); return; } @@ -355,7 +371,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, if (ForceDisp8) EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); else if (ForceDisp32 || Disp.getImm() != 0) - EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS, + Fixups); } /// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix @@ -708,14 +725,15 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if ((TSFlags & X86II::Op0Mask) == X86II::REP) EmitByte(0xF3, CurByte, OS); + // Emit the address size opcode prefix as needed. + if ((TSFlags & X86II::AdSize) || + (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand))) + EmitByte(0x67, CurByte, OS); + // Emit the operand size opcode prefix as needed. if (TSFlags & X86II::OpSize) EmitByte(0x66, CurByte, OS); - // Emit the address size opcode prefix as needed. - if (TSFlags & X86II::AdSize) - EmitByte(0x67, CurByte, OS); - bool Need0FPrefix = false; switch (TSFlags & X86II::Op0Mask) { default: assert(0 && "Invalid prefix!"); @@ -806,6 +824,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if ((TSFlags >> 32) & X86II::VEX_4V) HasVEX_4V = true; + // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); if (MemoryOperand != -1) MemoryOperand += CurOp; @@ -815,7 +834,12 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, else EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS); + unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags); + + if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode) + BaseOpcode = 0x0F; // Weird 3DNow! encoding. + unsigned SrcRegNum = 0; switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: @@ -828,6 +852,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(BaseOpcode, CurByte, OS); break; + case X86II::RawFrmImm8: + EmitByte(BaseOpcode, CurByte, OS); + EmitImmediate(MI.getOperand(CurOp++), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); + EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups); + break; case X86II::RawFrmImm16: EmitByte(BaseOpcode, CurByte, OS); EmitImmediate(MI.getOperand(CurOp++), @@ -963,12 +994,24 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, RegNum |= GetX86RegNum(MO) << 4; EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS, Fixups); - } else + } else { + unsigned FixupKind; + // FIXME: Is there a better way to know that we need a signed relocation? + if (MI.getOpcode() == X86::MOV64ri32 || + MI.getOpcode() == X86::MOV64mi32 || + MI.getOpcode() == X86::PUSH64i32) + FixupKind = X86::reloc_signed_4byte; + else + FixupKind = getImmFixupKind(TSFlags); EmitImmediate(MI.getOperand(CurOp++), - X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind), CurByte, OS, Fixups); + } } + if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode) + EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS); + #ifndef NDEBUG // FIXME: Verify. diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 8c4620f..cbe6db2 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "InstPrinter/X86ATTInstPrinter.h" #include "X86MCInstLower.h" #include "X86AsmPrinter.h" #include "X86COFFMachineModuleInfo.h" @@ -38,11 +39,6 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { } -MCSymbol *X86MCInstLower::GetPICBaseSymbol() const { - return static_cast<const X86TargetLowering*>(TM.getTargetLowering())-> - getPICBaseSymbol(&MF, Ctx); -} - /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol /// operand to an MCSymbol. MCSymbol *X86MCInstLower:: @@ -154,7 +150,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); // Subtract the pic base. Expr = MCBinaryExpr::CreateSub(Expr, - MCSymbolRefExpr::Create(GetPICBaseSymbol(), + MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx), Ctx); break; @@ -173,7 +169,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, Expr = MCSymbolRefExpr::Create(Sym, Ctx); // Subtract the pic base. Expr = MCBinaryExpr::CreateSub(Expr, - MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx), + MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx), Ctx); if (MO.isJTI() && MAI.hasSetDirective()) { // If .set directive is supported, use it to reduce the number of @@ -326,8 +322,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_GlobalAddress: - MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); - break; case MachineOperand::MO_ExternalSymbol: MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); break; @@ -347,6 +341,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } // Handle a few special cases to eliminate operand modifiers. +ReSimplify: switch (OutMI.getOpcode()) { case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand. lower_lea64_32mem(&OutMI, 1); @@ -377,11 +372,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break; case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break; case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; - case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break; - case X86::MMX_V_SETALLONES: - LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break; case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; + case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; + case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break; case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; @@ -417,6 +411,13 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; } + case X86::EH_RETURN: + case X86::EH_RETURN64: { + OutMI = MCInst(); + OutMI.setOpcode(X86::RET); + break; + } + // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions. case X86::TAILJMPr: case X86::TAILJMPd: @@ -436,6 +437,19 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; } + // These are pseudo-ops for OR to help with the OR->ADD transformation. We do + // this with an ugly goto in case the resultant OR uses EAX and needs the + // short form. + case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify; + case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify; + case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify; + case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify; + case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify; + case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify; + case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify; + case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify; + case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify; + // The assembler backend wants to see branches in their small form and relax // them to their large form. The JIT can only handle the large form because // it does not do relaxation. For now, translate the large form to the @@ -513,6 +527,66 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } } +static void LowerTlsAddr(MCStreamer &OutStreamer, + X86MCInstLower &MCInstLowering, + const MachineInstr &MI) { + bool is64Bits = MI.getOpcode() == X86::TLS_addr64; + MCContext &context = OutStreamer.getContext(); + + if (is64Bits) { + MCInst prefix; + prefix.setOpcode(X86::DATA16_PREFIX); + OutStreamer.EmitInstruction(prefix); + } + MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)); + const MCSymbolRefExpr *symRef = + MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context); + + MCInst LEA; + if (is64Bits) { + LEA.setOpcode(X86::LEA64r); + LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest + LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base + LEA.addOperand(MCOperand::CreateImm(1)); // scale + LEA.addOperand(MCOperand::CreateReg(0)); // index + LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp + LEA.addOperand(MCOperand::CreateReg(0)); // seg + } else { + LEA.setOpcode(X86::LEA32r); + LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest + LEA.addOperand(MCOperand::CreateReg(0)); // base + LEA.addOperand(MCOperand::CreateImm(1)); // scale + LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index + LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp + LEA.addOperand(MCOperand::CreateReg(0)); // seg + } + OutStreamer.EmitInstruction(LEA); + + if (is64Bits) { + MCInst prefix; + prefix.setOpcode(X86::DATA16_PREFIX); + OutStreamer.EmitInstruction(prefix); + prefix.setOpcode(X86::DATA16_PREFIX); + OutStreamer.EmitInstruction(prefix); + prefix.setOpcode(X86::REX64_PREFIX); + OutStreamer.EmitInstruction(prefix); + } + + MCInst call; + if (is64Bits) + call.setOpcode(X86::CALL64pcrel32); + else + call.setOpcode(X86::CALLpcrel32); + StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr"; + MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name); + const MCSymbolRefExpr *tlsRef = + MCSymbolRefExpr::Create(tlsGetAddr, + MCSymbolRefExpr::VK_PLT, + context); + + call.addOperand(MCOperand::CreateExpr(tlsRef)); + OutStreamer.EmitInstruction(call); +} void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(Mang, *MF, *this); @@ -532,13 +606,26 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER")); return; + + case X86::EH_RETURN: + case X86::EH_RETURN64: { + // Lower these as normal, but add some comments. + unsigned Reg = MI->getOperand(0).getReg(); + OutStreamer.AddComment(StringRef("eh_return, addr: %") + + X86ATTInstPrinter::getRegisterName(Reg)); + break; + } case X86::TAILJMPr: case X86::TAILJMPd: case X86::TAILJMPd64: // Lower these as normal, but add some comments. OutStreamer.AddComment("TAILCALL"); break; - + + case X86::TLS_addr32: + case X86::TLS_addr64: + return LowerTlsAddr(OutStreamer, MCInstLowering, *MI); + case X86::MOVPC32r: { MCInst TmpInst; // This is a pseudo op for a two instruction sequence with a label, which @@ -548,7 +635,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // popl %esi // Emit the call. - MCSymbol *PICBase = MCInstLowering.GetPICBaseSymbol(); + MCSymbol *PICBase = MF->getPICBaseSymbol(); TmpInst.setOpcode(X86::CALLpcrel32); // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. @@ -586,7 +673,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); const MCExpr *PICBase = - MCSymbolRefExpr::Create(MCInstLowering.GetPICBaseSymbol(), OutContext); + MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext); DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext); DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h index 539b09b..0210072 100644 --- a/lib/Target/X86/X86MCInstLower.h +++ b/lib/Target/X86/X86MCInstLower.h @@ -40,8 +40,6 @@ public: void Lower(const MachineInstr *MI, MCInst &OutMI) const; - MCSymbol *GetPICBaseSymbol() const; - MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; diff --git a/lib/Target/X86/X86MachObjectWriter.cpp b/lib/Target/X86/X86MachObjectWriter.cpp new file mode 100644 index 0000000..8f3dd32 --- /dev/null +++ b/lib/Target/X86/X86MachObjectWriter.cpp @@ -0,0 +1,32 @@ +//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "llvm/MC/MCMachObjectWriter.h" +using namespace llvm; + +namespace { +class X86MachObjectWriter : public MCMachObjectTargetWriter { +public: + X86MachObjectWriter(bool Is64Bit, uint32_t CPUType, + uint32_t CPUSubtype) + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, + /*UseAggressiveSymbolFolding=*/Is64Bit) {} +}; +} + +MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype) { + return createMachObjectWriter(new X86MachObjectWriter(Is64Bit, + CPUType, + CPUSubtype), + OS, /*IsLittleEndian=*/true); +} diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index fedd49e..2f6bd88 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -31,7 +31,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -41,7 +41,7 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; -static cl::opt<bool> +cl::opt<bool> ForceStackAlign("force-align-stack", cl::desc("Force align the stack to the minimum alignment" " needed for the function."), @@ -60,7 +60,7 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); Is64Bit = Subtarget->is64Bit(); IsWin64 = Subtarget->isTargetWin64(); - StackAlign = TM.getFrameInfo()->getStackAlignment(); + StackAlign = TM.getFrameLowering()->getStackAlignment(); if (Is64Bit) { SlotSize = 8; @@ -159,46 +159,21 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::YMM7: case X86::YMM15: case X86::MM7: return 7; - case X86::ES: - return 0; - case X86::CS: - return 1; - case X86::SS: - return 2; - case X86::DS: - return 3; - case X86::FS: - return 4; - case X86::GS: - return 5; - - case X86::CR0: - return 0; - case X86::CR1: - return 1; - case X86::CR2: - return 2; - case X86::CR3: - return 3; - case X86::CR4: - return 4; - - case X86::DR0: - return 0; - case X86::DR1: - return 1; - case X86::DR2: - return 2; - case X86::DR3: - return 3; - case X86::DR4: - return 4; - case X86::DR5: - return 5; - case X86::DR6: - return 6; - case X86::DR7: - return 7; + case X86::ES: return 0; + case X86::CS: return 1; + case X86::SS: return 2; + case X86::DS: return 3; + case X86::FS: return 4; + case X86::GS: return 5; + + case X86::CR0: case X86::CR8 : case X86::DR0: return 0; + case X86::CR1: case X86::CR9 : case X86::DR1: return 1; + case X86::CR2: case X86::CR10: case X86::DR2: return 2; + case X86::CR3: case X86::CR11: case X86::DR3: return 3; + case X86::CR4: case X86::CR12: case X86::DR4: return 4; + case X86::CR5: case X86::CR13: case X86::DR5: return 5; + case X86::CR6: case X86::CR14: case X86::DR6: return 6; + case X86::CR7: case X86::CR15: case X86::DR7: return 7; // Pseudo index registers are equivalent to a "none" // scaled index (See Intel Manual 2A, table 2-3) @@ -295,9 +270,14 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, } break; case X86::sub_32bit: - if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) { + if (B == &X86::GR32RegClass) { if (A->getSize() == 8) return A; + } else if (B == &X86::GR32_NOSPRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass) + return &X86::GR64_NOSPRegClass; + if (A->getSize() == 8) + return getCommonSubClass(A, &X86::GR64_NOSPRegClass); } else if (B == &X86::GR32_ABCDRegClass) { if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || A == &X86::GR64_NOREXRegClass || @@ -336,10 +316,16 @@ X86RegisterInfo::getPointerRegClass(unsigned Kind) const { if (TM.getSubtarget<X86Subtarget>().is64Bit()) return &X86::GR64RegClass; return &X86::GR32RegClass; - case 1: // Normal GRPs except the stack pointer (for encoding reasons). + case 1: // Normal GPRs except the stack pointer (for encoding reasons). if (TM.getSubtarget<X86Subtarget>().is64Bit()) return &X86::GR64_NOSPRegClass; return &X86::GR32_NOSPRegClass; + case 2: // Available for tailcall (not callee-saved GPRs). + if (TM.getSubtarget<X86Subtarget>().isTargetWin64()) + return &X86::GR64_TCW64RegClass; + if (TM.getSubtarget<X86Subtarget>().is64Bit()) + return &X86::GR64_TCRegClass; + return &X86::GR32_TCRegClass; } } @@ -408,6 +394,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + // Set the stack-pointer register and its aliases as reserved. Reserved.set(X86::RSP); Reserved.set(X86::ESP); @@ -420,7 +408,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::IP); // Set the frame-pointer register and its aliases as reserved if needed. - if (hasFP(MF)) { + if (TFI->hasFP(MF)) { Reserved.set(X86::RBP); Reserved.set(X86::EBP); Reserved.set(X86::BP); @@ -445,21 +433,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -/// hasFP - Return true if the specified function should have a dedicated frame -/// pointer register. This is true if the function has variable sized allocas -/// or if frame pointer elimination is disabled. -bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const MachineModuleInfo &MMI = MF.getMMI(); - - return (DisableFramePointerElim(MF) || - needsStackRealignment(MF) || - MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken() || - MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || - MMI.callsUnwindInit()); -} - bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return (RealignStack && @@ -478,62 +451,25 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { if (0 && requiresRealignment && MFI->hasVarSizedObjects()) report_fatal_error( "Stack realignment in presense of dynamic allocas is not supported"); - + // If we've requested that we force align the stack do so now. if (ForceStackAlign) return canRealignStack(MF); - - return requiresRealignment && canRealignStack(MF); -} -bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { - return !MF.getFrameInfo()->hasVarSizedObjects(); + return requiresRealignment && canRealignStack(MF); } bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const { - if (Reg == FramePtr && hasFP(MF)) { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (Reg == FramePtr && TFI->hasFP(MF)) { FrameIdx = MF.getFrameInfo()->getObjectIndexBegin(); return true; } return false; } -int -X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { - const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea(); - uint64_t StackSize = MFI->getStackSize(); - - if (needsStackRealignment(MF)) { - if (FI < 0) { - // Skip the saved EBP. - Offset += SlotSize; - } else { - unsigned Align = MFI->getObjectAlignment(FI); - assert((-(Offset + StackSize)) % Align == 0); - Align = 0; - return Offset + StackSize; - } - // FIXME: Support tail calls - } else { - if (!hasFP(MF)) - return Offset + StackSize; - - // Skip the saved EBP. - Offset += SlotSize; - - // Skip the RETADDR move area - const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - if (TailCallReturnAddrDelta < 0) - Offset -= TailCallReturnAddrDelta; - } - - return Offset; -} - static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { if (is64Bit) { if (isInt<8>(Imm)) @@ -561,69 +497,70 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { void X86RegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (!hasReservedCallFrame(MF)) { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + bool reseveCallFrame = TFI->hasReservedCallFrame(MF); + int Opcode = I->getOpcode(); + bool isDestroy = Opcode == getCallFrameDestroyOpcode(); + DebugLoc DL = I->getDebugLoc(); + uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; + uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; + I = MBB.erase(I); + + if (!reseveCallFrame) { // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub ESP, <amt>' and the // adjcallstackdown instruction into 'add ESP, <amt>' // TODO: consider using push / pop instead of sub + store / add - MachineInstr *Old = I; - uint64_t Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; - - MachineInstr *New = 0; - if (Old->getOpcode() == getCallFrameSetupOpcode()) { - New = BuildMI(MF, Old->getDebugLoc(), - TII.get(getSUBriOpcode(Is64Bit, Amount)), - StackPtr) - .addReg(StackPtr) - .addImm(Amount); - } else { - assert(Old->getOpcode() == getCallFrameDestroyOpcode()); - - // Factor out the amount the callee already popped. - uint64_t CalleeAmt = Old->getOperand(1).getImm(); - Amount -= CalleeAmt; - - if (Amount) { - unsigned Opc = getADDriOpcode(Is64Bit, Amount); - New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(Amount); - } - } + if (Amount == 0) + return; + + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; + + MachineInstr *New = 0; + if (Opcode == getCallFrameSetupOpcode()) { + New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)), + StackPtr) + .addReg(StackPtr) + .addImm(Amount); + } else { + assert(Opcode == getCallFrameDestroyOpcode()); - if (New) { - // The EFLAGS implicit def is dead. - New->getOperand(3).setIsDead(); + // Factor out the amount the callee already popped. + Amount -= CalleeAmt; - // Replace the pseudo instruction with a new instruction. - MBB.insert(I, New); + if (Amount) { + unsigned Opc = getADDriOpcode(Is64Bit, Amount); + New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(Amount); } } - } else if (I->getOpcode() == getCallFrameDestroyOpcode()) { - // If we are performing frame pointer elimination and if the callee pops - // something off the stack pointer, add it back. We do this until we have - // more advanced stack pointer tracking ability. - if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { - unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); - MachineInstr *Old = I; - MachineInstr *New = - BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), - StackPtr) - .addReg(StackPtr) - .addImm(CalleeAmt); + if (New) { // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); + + // Replace the pseudo instruction with a new instruction. MBB.insert(I, New); } + + return; } - MBB.erase(I); + if (Opcode == getCallFrameDestroyOpcode() && CalleeAmt) { + // If we are performing frame pointer elimination and if the callee pops + // something off the stack pointer, add it back. We do this until we have + // more advanced stack pointer tracking ability. + unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); + MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(CalleeAmt); + + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); + MBB.insert(I, New); + } } void @@ -634,6 +571,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned i = 0; MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); while (!MI.getOperand(i).isFI()) { ++i; @@ -650,7 +588,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, else if (AfterFPPop) BasePtr = StackPtr; else - BasePtr = (hasFP(MF) ? FramePtr : StackPtr); + BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr); // This must be part of a four operand memory reference. Replace the // FrameIndex with base register with EBP. Add an offset to the offset. @@ -660,11 +598,10 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FIOffset; if (AfterFPPop) { // Tail call jmp happens after FP is popped. - const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); const MachineFrameInfo *MFI = MF.getFrameInfo(); - FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea(); + FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea(); } else - FIOffset = getFrameIndexOffset(MF, FrameIndex); + FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex); if (MI.getOperand(i+3).isImm()) { // Offset is a 32-bit integer. @@ -677,710 +614,14 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } -void -X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - - if (TailCallReturnAddrDelta < 0) { - // create RETURNADDR area - // arg - // arg - // RETADDR - // { ... - // RETADDR area - // ... - // } - // [EBP] - MFI->CreateFixedObject(-TailCallReturnAddrDelta, - (-1U*SlotSize)+TailCallReturnAddrDelta, true); - } - - if (hasFP(MF)) { - assert((TailCallReturnAddrDelta <= 0) && - "The Delta should always be zero or negative"); - const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); - - // Create a frame entry for the EBP register that must be saved. - int FrameIdx = MFI->CreateFixedObject(SlotSize, - -(int)SlotSize + - TFI.getOffsetOfLocalArea() + - TailCallReturnAddrDelta, - true); - assert(FrameIdx == MFI->getObjectIndexBegin() && - "Slot for EBP register must be last in order to be found!"); - FrameIdx = 0; - } -} - -/// emitSPUpdate - Emit a series of instructions to increment / decrement the -/// stack pointer by a constant value. -static -void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, int64_t NumBytes, bool Is64Bit, - const TargetInstrInfo &TII) { - bool isSub = NumBytes < 0; - uint64_t Offset = isSub ? -NumBytes : NumBytes; - unsigned Opc = isSub ? - getSUBriOpcode(Is64Bit, Offset) : - getADDriOpcode(Is64Bit, Offset); - uint64_t Chunk = (1LL << 31) - 1; - DebugLoc DL = MBB.findDebugLoc(MBBI); - - while (Offset) { - uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(ThisVal); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. - Offset -= ThisVal; - } -} - -/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. -static -void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, uint64_t *NumBytes = NULL) { - if (MBBI == MBB.begin()) return; - - MachineBasicBlock::iterator PI = prior(MBBI); - unsigned Opc = PI->getOpcode(); - if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || - Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && - PI->getOperand(0).getReg() == StackPtr) { - if (NumBytes) - *NumBytes += PI->getOperand(2).getImm(); - MBB.erase(PI); - } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || - Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && - PI->getOperand(0).getReg() == StackPtr) { - if (NumBytes) - *NumBytes -= PI->getOperand(2).getImm(); - MBB.erase(PI); - } -} - -/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. -static -void mergeSPUpdatesDown(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, uint64_t *NumBytes = NULL) { - // FIXME: THIS ISN'T RUN!!! - return; - - if (MBBI == MBB.end()) return; - - MachineBasicBlock::iterator NI = llvm::next(MBBI); - if (NI == MBB.end()) return; - - unsigned Opc = NI->getOpcode(); - if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || - Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && - NI->getOperand(0).getReg() == StackPtr) { - if (NumBytes) - *NumBytes -= NI->getOperand(2).getImm(); - MBB.erase(NI); - MBBI = NI; - } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || - Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && - NI->getOperand(0).getReg() == StackPtr) { - if (NumBytes) - *NumBytes += NI->getOperand(2).getImm(); - MBB.erase(NI); - MBBI = NI; - } -} - -/// mergeSPUpdates - Checks the instruction before/after the passed -/// instruction. If it is an ADD/SUB instruction it is deleted argument and the -/// stack adjustment is returned as a positive value for ADD and a negative for -/// SUB. -static int mergeSPUpdates(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, - bool doMergeWithPrevious) { - if ((doMergeWithPrevious && MBBI == MBB.begin()) || - (!doMergeWithPrevious && MBBI == MBB.end())) - return 0; - - MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; - MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); - unsigned Opc = PI->getOpcode(); - int Offset = 0; - - if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || - Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && - PI->getOperand(0).getReg() == StackPtr){ - Offset += PI->getOperand(2).getImm(); - MBB.erase(PI); - if (!doMergeWithPrevious) MBBI = NI; - } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || - Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && - PI->getOperand(0).getReg() == StackPtr) { - Offset -= PI->getOperand(2).getImm(); - MBB.erase(PI); - if (!doMergeWithPrevious) MBBI = NI; - } - - return Offset; -} - -void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF, - MCSymbol *Label, - unsigned FramePtr) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo &MMI = MF.getMMI(); - - // Add callee saved registers to move list. - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - if (CSI.empty()) return; - - std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - const TargetData *TD = MF.getTarget().getTargetData(); - bool HasFP = hasFP(MF); - - // Calculate amount of bytes used for return address storing. - int stackGrowth = - (MF.getTarget().getFrameInfo()->getStackGrowthDirection() == - TargetFrameInfo::StackGrowsUp ? - TD->getPointerSize() : -TD->getPointerSize()); - - // FIXME: This is dirty hack. The code itself is pretty mess right now. - // It should be rewritten from scratch and generalized sometimes. - - // Determine maximum offset (minumum due to stack growth). - int64_t MaxOffset = 0; - for (std::vector<CalleeSavedInfo>::const_iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) - MaxOffset = std::min(MaxOffset, - MFI->getObjectOffset(I->getFrameIdx())); - - // Calculate offsets. - int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; - for (std::vector<CalleeSavedInfo>::const_iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) { - int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); - unsigned Reg = I->getReg(); - Offset = MaxOffset - Offset + saveAreaOffset; - - // Don't output a new machine move if we're re-saving the frame - // pointer. This happens when the PrologEpilogInserter has inserted an extra - // "PUSH" of the frame pointer -- the "emitPrologue" method automatically - // generates one when frame pointers are used. If we generate a "machine - // move" for this extra "PUSH", the linker will lose track of the fact that - // the frame pointer should have the value of the first "PUSH" when it's - // trying to unwind. - // - // FIXME: This looks inelegant. It's possibly correct, but it's covering up - // another bug. I.e., one where we generate a prolog like this: - // - // pushl %ebp - // movl %esp, %ebp - // pushl %ebp - // pushl %esi - // ... - // - // The immediate re-push of EBP is unnecessary. At the least, it's an - // optimization bug. EBP can be used as a scratch register in certain - // cases, but probably not when we have a frame pointer. - if (HasFP && FramePtr == Reg) - continue; - - MachineLocation CSDst(MachineLocation::VirtualFP, Offset); - MachineLocation CSSrc(Reg); - Moves.push_back(MachineMove(Label, CSDst, CSSrc)); - } -} - -/// emitPrologue - Push callee-saved registers onto the stack, which -/// automatically adjust the stack pointer. Adjust the stack pointer to allocate -/// space for local variables. Also emit labels used by the exception handler to -/// generate the exception handling frames. -void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const Function *Fn = MF.getFunction(); - const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>(); - MachineModuleInfo &MMI = MF.getMMI(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - bool needsFrameMoves = MMI.hasDebugInfo() || - !Fn->doesNotThrow() || UnwindTablesMandatory; - uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. - uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. - bool HasFP = hasFP(MF); - DebugLoc DL; - - // If we're forcing a stack realignment we can't rely on just the frame - // info, we need to know the ABI stack alignment as well in case we - // have a call out. Otherwise just make sure we have some alignment - we'll - // go with the minimum SlotSize. - if (ForceStackAlign) { - if (MFI->hasCalls()) - MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; - else if (MaxAlign < SlotSize) - MaxAlign = SlotSize; - } - - // Add RETADDR move area to callee saved frame size. - int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - if (TailCallReturnAddrDelta < 0) - X86FI->setCalleeSavedFrameSize( - X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); - - // If this is x86-64 and the Red Zone is not disabled, if we are a leaf - // function, and use up to 128 bytes of stack space, don't have a frame - // pointer, calls, or dynamic alloca then we do not need to adjust the - // stack pointer (we fit in the Red Zone). - if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && - !needsStackRealignment(MF) && - !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->adjustsStack() && // No calls. - !Subtarget->isTargetWin64()) { // Win64 has no Red Zone - uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); - if (HasFP) MinSize += SlotSize; - StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); - MFI->setStackSize(StackSize); - } else if (Subtarget->isTargetWin64()) { - // We need to always allocate 32 bytes as register spill area. - // FIXME: We might reuse these 32 bytes for leaf functions. - StackSize += 32; - MFI->setStackSize(StackSize); - } - - // Insert stack pointer adjustment for later moving of return addr. Only - // applies to tail call optimized functions where the callee argument stack - // size is bigger than the callers. - if (TailCallReturnAddrDelta < 0) { - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, - TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), - StackPtr) - .addReg(StackPtr) - .addImm(-TailCallReturnAddrDelta); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. - } - - // Mapping for machine moves: - // - // DST: VirtualFP AND - // SRC: VirtualFP => DW_CFA_def_cfa_offset - // ELSE => DW_CFA_def_cfa - // - // SRC: VirtualFP AND - // DST: Register => DW_CFA_def_cfa_register - // - // ELSE - // OFFSET < 0 => DW_CFA_offset_extended_sf - // REG < 64 => DW_CFA_offset + Reg - // ELSE => DW_CFA_offset_extended - - std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - const TargetData *TD = MF.getTarget().getTargetData(); - uint64_t NumBytes = 0; - int stackGrowth = -TD->getPointerSize(); - - if (HasFP) { - // Calculate required stack adjustment. - uint64_t FrameSize = StackSize - SlotSize; - if (needsStackRealignment(MF)) - FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; - - NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); - - // Get the offset of the stack slot for the EBP register, which is - // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. - // Update the frame offset adjustment. - MFI->setOffsetAdjustment(-NumBytes); - - // Save EBP/RBP into the appropriate stack slot. - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) - .addReg(FramePtr, RegState::Kill); - - if (needsFrameMoves) { - // Mark the place where EBP/RBP was saved. - MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); - - // Define the current CFA rule to use the provided offset. - if (StackSize) { - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth); - Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); - } else { - // FIXME: Verify & implement for FP - MachineLocation SPDst(StackPtr); - MachineLocation SPSrc(StackPtr, stackGrowth); - Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); - } - - // Change the rule for the FramePtr to be an "offset" rule. - MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth); - MachineLocation FPSrc(FramePtr); - Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); - } - - // Update EBP with the new base value... - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) - .addReg(StackPtr); - - if (needsFrameMoves) { - // Mark effective beginning of when frame pointer becomes valid. - MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); - - // Define the current CFA to use the EBP/RBP register. - MachineLocation FPDst(FramePtr); - MachineLocation FPSrc(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); - } - - // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); - I != E; ++I) - I->addLiveIn(FramePtr); - - // Realign stack - if (needsStackRealignment(MF)) { - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), - StackPtr).addReg(StackPtr).addImm(-MaxAlign); - - // The EFLAGS implicit def is dead. - MI->getOperand(3).setIsDead(); - } - } else { - NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); - } - - // Skip the callee-saved push instructions. - bool PushedRegs = false; - int StackOffset = 2 * stackGrowth; - - while (MBBI != MBB.end() && - (MBBI->getOpcode() == X86::PUSH32r || - MBBI->getOpcode() == X86::PUSH64r)) { - PushedRegs = true; - ++MBBI; - - if (!HasFP && needsFrameMoves) { - // Mark callee-saved push instruction. - MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); - - // Define the current CFA rule to use the provided offset. - unsigned Ptr = StackSize ? - MachineLocation::VirtualFP : StackPtr; - MachineLocation SPDst(Ptr); - MachineLocation SPSrc(Ptr, StackOffset); - Moves.push_back(MachineMove(Label, SPDst, SPSrc)); - StackOffset += stackGrowth; - } - } - - DL = MBB.findDebugLoc(MBBI); - - // Adjust stack pointer: ESP -= numbytes. - - // Windows and cygwin/mingw require a prologue helper routine when allocating - // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw - // uses __alloca. __alloca and the 32-bit version of __chkstk will probe - // the stack and adjust the stack pointer in one go. The 64-bit version - // of __chkstk is only responsible for probing the stack. The 64-bit - // prologue is responsible for adjusting the stack pointer. Touching the - // stack at 4K increments is necessary to ensure that the guard pages used - // by the OS virtual memory manager are allocated in correct sequence. - if (NumBytes >= 4096 && - (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) { - // Check, whether EAX is livein for this function. - bool isEAXAlive = false; - for (MachineRegisterInfo::livein_iterator - II = MF.getRegInfo().livein_begin(), - EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) { - unsigned Reg = II->first; - isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || - Reg == X86::AH || Reg == X86::AL); - } - - - const char *StackProbeSymbol = - Subtarget->isTargetWindows() ? "_chkstk" : "_alloca"; - if (!isEAXAlive) { - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes); - BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - } else { - // Save EAX - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill); - - // Allocate NumBytes-4 bytes on stack. We'll also use 4 already - // allocated bytes for EAX. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes - 4); - BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - - // Restore EAX - MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), - X86::EAX), - StackPtr, false, NumBytes - 4); - MBB.insert(MBBI, MI); - } - } else if (NumBytes) { - // If there is an SUB32ri of ESP immediately before this instruction, merge - // the two. This can be the case when tail call elimination is enabled and - // the callee has more arguments then the caller. - NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); - - // If there is an ADD32ri or SUB32ri of ESP immediately after this - // instruction, merge the two instructions. - mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); - - if (NumBytes) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); - } - - if ((NumBytes || PushedRegs) && needsFrameMoves) { - // Mark end of stack pointer adjustment. - MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); - - if (!HasFP && NumBytes) { - // Define the current CFA rule to use the provided offset. - if (StackSize) { - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, - -StackSize + stackGrowth); - Moves.push_back(MachineMove(Label, SPDst, SPSrc)); - } else { - // FIXME: Verify & implement for FP - MachineLocation SPDst(StackPtr); - MachineLocation SPSrc(StackPtr, stackGrowth); - Moves.push_back(MachineMove(Label, SPDst, SPSrc)); - } - } - - // Emit DWARF info specifying the offsets of the callee-saved registers. - if (PushedRegs) - emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); - } -} - -void X86RegisterInfo::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - unsigned RetOpcode = MBBI->getOpcode(); - DebugLoc DL = MBBI->getDebugLoc(); - - switch (RetOpcode) { - default: - llvm_unreachable("Can only insert epilog into returning blocks"); - case X86::RET: - case X86::RETI: - case X86::TCRETURNdi: - case X86::TCRETURNri: - case X86::TCRETURNmi: - case X86::TCRETURNdi64: - case X86::TCRETURNri64: - case X86::TCRETURNmi64: - case X86::EH_RETURN: - case X86::EH_RETURN64: - break; // These are ok - } - - // Get the number of bytes to allocate from the FrameInfo. - uint64_t StackSize = MFI->getStackSize(); - uint64_t MaxAlign = MFI->getMaxAlignment(); - unsigned CSSize = X86FI->getCalleeSavedFrameSize(); - uint64_t NumBytes = 0; - - // If we're forcing a stack realignment we can't rely on just the frame - // info, we need to know the ABI stack alignment as well in case we - // have a call out. Otherwise just make sure we have some alignment - we'll - // go with the minimum. - if (ForceStackAlign) { - if (MFI->hasCalls()) - MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; - else - MaxAlign = MaxAlign ? MaxAlign : 4; - } - - if (hasFP(MF)) { - // Calculate required stack adjustment. - uint64_t FrameSize = StackSize - SlotSize; - if (needsStackRealignment(MF)) - FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; - - NumBytes = FrameSize - CSSize; - - // Pop EBP. - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); - } else { - NumBytes = StackSize - CSSize; - } - - // Skip the callee-saved pop instructions. - MachineBasicBlock::iterator LastCSPop = MBBI; - while (MBBI != MBB.begin()) { - MachineBasicBlock::iterator PI = prior(MBBI); - unsigned Opc = PI->getOpcode(); - - if (Opc != X86::POP32r && Opc != X86::POP64r && - !PI->getDesc().isTerminator()) - break; - - --MBBI; - } - - DL = MBBI->getDebugLoc(); - - // If there is an ADD32ri or SUB32ri of ESP immediately before this - // instruction, merge the two instructions. - if (NumBytes || MFI->hasVarSizedObjects()) - mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); - - // If dynamic alloca is used, then reset esp to point to the last callee-saved - // slot before popping them off! Same applies for the case, when stack was - // realigned. - if (needsStackRealignment(MF)) { - // We cannot use LEA here, because stack pointer was realigned. We need to - // deallocate local frame back. - if (CSSize) { - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); - MBBI = prior(LastCSPop); - } - - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), - StackPtr).addReg(FramePtr); - } else if (MFI->hasVarSizedObjects()) { - if (CSSize) { - unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; - MachineInstr *MI = - addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), - FramePtr, false, -CSSize); - MBB.insert(MBBI, MI); - } else { - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) - .addReg(FramePtr); - } - } else if (NumBytes) { - // Adjust stack pointer back: ESP += numbytes. - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); - } - - // We're returning from function via eh_return. - if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { - MBBI = prior(MBB.end()); - MachineOperand &DestAddr = MBBI->getOperand(0); - assert(DestAddr.isReg() && "Offset should be in register!"); - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), - StackPtr).addReg(DestAddr.getReg()); - } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || - RetOpcode == X86::TCRETURNmi || - RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || - RetOpcode == X86::TCRETURNmi64) { - bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; - // Tail call return: adjust the stack pointer and jump to callee. - MBBI = prior(MBB.end()); - MachineOperand &JumpTarget = MBBI->getOperand(0); - MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); - assert(StackAdjust.isImm() && "Expecting immediate value."); - - // Adjust stack pointer. - int StackAdj = StackAdjust.getImm(); - int MaxTCDelta = X86FI->getTCReturnAddrDelta(); - int Offset = 0; - assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); - - // Incoporate the retaddr area. - Offset = StackAdj-MaxTCDelta; - assert(Offset >= 0 && "Offset should never be negative"); - - if (Offset) { - // Check for possible merge with preceeding ADD instruction. - Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII); - } - - // Jump to label or value in register. - if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { - BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) - ? X86::TAILJMPd : X86::TAILJMPd64)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), - JumpTarget.getTargetFlags()); - } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) - ? X86::TAILJMPm : X86::TAILJMPm64)); - for (unsigned i = 0; i != 5; ++i) - MIB.addOperand(MBBI->getOperand(i)); - } else if (RetOpcode == X86::TCRETURNri64) { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). - addReg(JumpTarget.getReg(), RegState::Kill); - } else { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). - addReg(JumpTarget.getReg(), RegState::Kill); - } - - MachineInstr *NewMI = prior(MBBI); - for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) - NewMI->addOperand(MBBI->getOperand(i)); - - // Delete the pseudo instruction TCRETURN. - MBB.erase(MBBI); - } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && - (X86FI->getTCReturnAddrDelta() < 0)) { - // Add the return addr area delta back since we are not tail calling. - int delta = -1*X86FI->getTCReturnAddrDelta(); - MBBI = prior(MBB.end()); - - // Check for possible merge with preceeding ADD instruction. - delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII); - } -} - unsigned X86RegisterInfo::getRARegister() const { return Is64Bit ? X86::RIP // Should have dwarf #16. : X86::EIP; // Should have dwarf #8. } unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - return hasFP(MF) ? FramePtr : StackPtr; -} - -void -X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const { - // Calculate amount of bytes used for return address storing - int stackGrowth = (Is64Bit ? -8 : -4); - - // Initial state of the frame pointer is esp+stackGrowth. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(StackPtr, stackGrowth); - Moves.push_back(MachineMove(0, Dst, Src)); - - // Add return address to move list - MachineLocation CSDst(StackPtr, stackGrowth); - MachineLocation CSSrc(getRARegister()); - Moves.push_back(MachineMove(0, CSDst, CSSrc)); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + return TFI->hasFP(MF) ? FramePtr : StackPtr; } unsigned X86RegisterInfo::getEHExceptionRegister() const { @@ -1579,13 +820,13 @@ namespace { // Be over-conservative: scan over all vreg defs and find whether vector // registers are used. If yes, there is a possibility that vector register // will be spilled and thus require dynamic stack realignment. - for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; - RegNum < RI.getLastVirtReg(); ++RegNum) - if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) { + for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) { FuncInfo->setReserveFP(true); return true; } - + } // Nothing to do return false; } diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 527df05..064be64 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -111,14 +111,10 @@ public: /// register scavenger to determine what registers are free. BitVector getReservedRegs(const MachineFunction &MF) const; - bool hasFP(const MachineFunction &MF) const; - bool canRealignStack(const MachineFunction &MF) const; bool needsStackRealignment(const MachineFunction &MF) const; - bool hasReservedCallFrame(const MachineFunction &MF) const; - bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; @@ -129,19 +125,12 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, RegScavenger *RS = NULL) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; - - void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label, - unsigned FramePtr) const; - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; - int getFrameIndexOffset(const MachineFunction &MF, int FI) const; - void getInitialFrameState(std::vector<MachineMove> &Moves) const; + unsigned getStackRegister() const { return StackPtr; } + // FIXME: Move to FrameInfok + unsigned getSlotSize() const { return SlotSize; } // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 95269b1..612fac2 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -1,10 +1,10 @@ //===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the X86 Register file, defining the registers themselves, @@ -34,8 +34,8 @@ let Namespace = "X86" in { // because the register file generator is smart enough to figure out that // AL aliases AX if we tell it that AX aliased AL (for example). - // Dwarf numbering is different for 32-bit and 64-bit, and there are - // variations by target as well. Currently the first entry is for X86-64, + // Dwarf numbering is different for 32-bit and 64-bit, and there are + // variations by target as well. Currently the first entry is for X86-64, // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux // and debug information on X86-32/Darwin) @@ -81,7 +81,7 @@ let Namespace = "X86" in { def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>; } def IP : Register<"ip">, DwarfRegNum<[16]>; - + // X86-64 only let SubRegIndices = [sub_8bit] in { def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>; @@ -103,8 +103,8 @@ let Namespace = "X86" in { def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[5, 7, 7]>; def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[6, 4, 5]>; def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>; - def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>; - + def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>; + // X86-64 only def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>; def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>; @@ -208,7 +208,7 @@ let Namespace = "X86" in { def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>; def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>; def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>; - def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>; + def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>; // Status flags register def EFLAGS : Register<"flags">; @@ -220,7 +220,7 @@ let Namespace = "X86" in { def ES : Register<"es">; def FS : Register<"fs">; def GS : Register<"gs">; - + // Debug registers def DR0 : Register<"dr0">; def DR1 : Register<"dr1">; @@ -230,8 +230,8 @@ let Namespace = "X86" in { def DR5 : Register<"dr5">; def DR6 : Register<"dr6">; def DR7 : Register<"dr7">; - - // Condition registers + + // Control registers def CR0 : Register<"cr0">; def CR1 : Register<"cr1">; def CR2 : Register<"cr2">; @@ -241,6 +241,13 @@ let Namespace = "X86" in { def CR6 : Register<"cr6">; def CR7 : Register<"cr7">; def CR8 : Register<"cr8">; + def CR9 : Register<"cr9">; + def CR10 : Register<"cr10">; + def CR11 : Register<"cr11">; + def CR12 : Register<"cr12">; + def CR13 : Register<"cr13">; + def CR14 : Register<"cr14">; + def CR15 : Register<"cr15">; // Pseudo index registers def EIZ : Register<"eiz">; @@ -254,10 +261,10 @@ let Namespace = "X86" in { // implicitly defined to be the register allocation order. // -// List call-clobbered registers before callee-save registers. RBX, RBP, (and +// List call-clobbered registers before callee-save registers. RBX, RBP, (and // R12, R13, R14, and R15 for X86-64) are callee-save registers. // In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and -// R8B, ... R15B. +// R8B, ... R15B. // Allocate R12 and R13 last, as these require an extra byte when // encoded in x86_64 instructions. // FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in @@ -292,14 +299,14 @@ def GR8 : RegisterClass<"X86", [i8], 8, GR8Class::iterator GR8Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP / EBP to being a frame ptr? if (!Subtarget.is64Bit()) // In 32-mode, none of the 8-bit registers aliases EBP or ESP. return begin() + 8; - else if (RI->hasFP(MF) || MFI->getReserveFP()) + else if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SPL or BPL. return array_endof(X86_GR8_AO_64) - 1; else @@ -337,12 +344,12 @@ def GR16 : RegisterClass<"X86", [i16], 16, GR16Class::iterator GR16Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return array_endof(X86_GR16_AO_64) - 1; else @@ -350,7 +357,7 @@ def GR16 : RegisterClass<"X86", [i16], 16, return array_endof(X86_GR16_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return begin() + 6; else @@ -389,12 +396,12 @@ def GR32 : RegisterClass<"X86", [i32], 32, GR32Class::iterator GR32Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return array_endof(X86_GR32_AO_64) - 1; else @@ -402,7 +409,7 @@ def GR32 : RegisterClass<"X86", [i32], 32, return array_endof(X86_GR32_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return begin() + 6; else @@ -429,13 +436,13 @@ def GR64 : RegisterClass<"X86", [i64], 64, GR64Class::iterator GR64Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (!Subtarget.is64Bit()) return begin(); // None of these are allocatable in 32-bit. // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) return end()-3; // If so, don't allocate RIP, RSP or RBP else return end()-2; // If not, just don't allocate RIP or RSP @@ -446,18 +453,16 @@ def GR64 : RegisterClass<"X86", [i64], 64, // Segment registers for use by MOV instructions (and others) that have a // segment register as one operand. Always contain a 16-bit segment // descriptor. -def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> { -} +def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]>; // Debug registers. def DEBUG_REG : RegisterClass<"X86", [i32], 32, - [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> { -} + [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]>; // Control registers. def CONTROL_REG : RegisterClass<"X86", [i64], 64, - [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> { -} + [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8, + CR9, CR10, CR11, CR12, CR13, CR14, CR15]>; // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of // GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d" @@ -465,10 +470,8 @@ def CONTROL_REG : RegisterClass<"X86", [i64], 64, // that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD, // and GR64_ABCD are classes for registers that support 8-bit h-register // operations. -def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> { -} -def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> { -} +def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]>; +def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]>; def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> { let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)]; } @@ -493,6 +496,9 @@ def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, (GR32_TC sub_32bit)]; } +def GR64_TCW64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, + R8, R9, R11]>; + // GR8_NOREX - GR8 registers which do not require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, [AL, CL, DL, AH, CH, DH, BL, BH]> { @@ -538,10 +544,10 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16, GR16_NOREXClass::iterator GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP / EBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return end() - 2; else @@ -562,10 +568,10 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32, GR32_NOREXClass::iterator GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP / EBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return end() - 2; else @@ -587,10 +593,10 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64, GR64_NOREXClass::iterator GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate RIP, RSP or RBP. return end() - 3; else @@ -629,12 +635,12 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32, GR32_NOSPClass::iterator GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate EBP. return array_endof(X86_GR32_NOSP_AO_64) - 1; else @@ -642,7 +648,7 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32, return array_endof(X86_GR32_NOSP_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate EBP. return begin() + 6; else @@ -667,13 +673,13 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64, GR64_NOSPClass::iterator GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (!Subtarget.is64Bit()) return begin(); // None of these are allocatable in 32-bit. // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) return end()-1; // If so, don't allocate RBP else return end(); // If not, any reg in this class is ok. @@ -695,10 +701,10 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate RBP. return end() - 1; else @@ -784,7 +790,7 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, } // Generic vector registers: VR64 and VR128. -def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64, +def VR64: RegisterClass<"X86", [x86mmx], 64, [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>; def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 6297a27..42e8193 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -32,10 +32,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, - const Value *DstSV, - uint64_t DstSVOff) const { + MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + // If to a segment-relative address space, use the default lowering. + if (DstPtrInfo.getAddrSpace() >= 256) + return SDValue(); + // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. @@ -133,7 +136,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, Dst, InFlag); InFlag = Chain.getValue(1); - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); @@ -147,7 +150,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, X86::ECX, Left, InFlag); InFlag = Chain.getValue(1); - Tys = DAG.getVTList(MVT::Other, MVT::Flag); + Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); } else if (BytesLeft) { @@ -161,7 +164,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, DAG.getConstant(Offset, AddrVT)), Src, DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, DstSV, DstSVOff + Offset); + Align, isVolatile, DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. @@ -173,10 +176,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const { + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferrably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -186,14 +187,29 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); - /// If not DWORD aligned, call the library. - if ((Align & 3) != 0) + /// If not DWORD aligned, it is more efficient to call the library. However + /// if calling the library is not allowed (AlwaysInline), then soldier on as + /// the code generated here is better than the long load-store sequence we + /// would otherwise get. + if (!AlwaysInline && (Align & 3) != 0) + return SDValue(); + + // If to a segment-relative address space, use the default lowering. + if (DstPtrInfo.getAddrSpace() >= 256 || + SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); - // DWORD aligned - EVT AVT = MVT::i32; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned - AVT = MVT::i64; + MVT AVT; + if (Align & 1) + AVT = MVT::i8; + else if (Align & 2) + AVT = MVT::i16; + else if (Align & 4) + // DWORD aligned + AVT = MVT::i32; + else + // QWORD aligned + AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; @@ -214,7 +230,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Src, InFlag); InFlag = Chain.getValue(1); - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, array_lengthof(Ops)); @@ -234,8 +250,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, - DstSV, DstSVOff + Offset, - SrcSV, SrcSVOff + Offset)); + DstPtrInfo.getWithOffset(Offset), + SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h index 4f30f31..d1d66fe 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.h +++ b/lib/Target/X86/X86SelectionDAGInfo.h @@ -39,8 +39,7 @@ public: SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, - const Value *DstSV, - uint64_t DstSVOff) const; + MachinePointerInfo DstPtrInfo) const; virtual SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, @@ -48,10 +47,8 @@ public: SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const; + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; }; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 0d02e5e..de76856 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -1,4 +1,4 @@ -//===-- X86Subtarget.cpp - X86 Subtarget Information ------------*- C++ -*-===// +//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -18,7 +18,7 @@ #include "llvm/GlobalValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/System/Host.h" +#include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallVector.h" @@ -256,13 +256,14 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { if ((ECX >> 9) & 1) X86SSELevel = SSSE3; if ((ECX >> 19) & 1) X86SSELevel = SSE41; if ((ECX >> 20) & 1) X86SSELevel = SSE42; + // FIXME: AVX codegen support is not ready. + //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; } bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); - HasAVX = ((ECX >> 28) & 0x1); HasAES = IsIntel && ((ECX >> 25) & 0x1); if (IsIntel || IsAMD) { @@ -289,6 +290,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , X863DNowLevel(NoThreeDNow) , HasCMov(false) , HasX86_64(false) + , HasPOPCNT(false) , HasSSE4A(false) , HasAVX(false) , HasAES(false) @@ -315,11 +317,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, ParseSubtargetFeatures(FS, CPU); // All X86-64 CPUs also have SSE2, however user might request no SSE via // -mattr, so don't force SSELevel here. + if (HasAVX) + X86SSELevel = NoMMXSSE; } else { // Otherwise, use CPUID to auto-detect feature set. AutoDetectSubtargetFeatures(); // Make sure SSE2 is enabled; it is available on all X86-64 CPUs. - if (Is64Bit && X86SSELevel < SSE2) + if (Is64Bit && !HasAVX && X86SSELevel < SSE2) X86SSELevel = SSE2; } @@ -338,9 +342,9 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, assert((!Is64Bit || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64 - // bit targets. - if (isTargetDarwin() || Is64Bit) + // Stack alignment is 16 bytes on Darwin and Linux (both 32 and 64 bit) and + // for all 64-bit targets. + if (isTargetDarwin() || isTargetLinux() || Is64Bit) stackAlignment = 16; if (StackAlignment) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 0ee91ab..8a119b4 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -65,6 +65,9 @@ protected: /// bool HasX86_64; + /// HasPOPCNT - True if the processor supports POPCNT. + bool HasPOPCNT; + /// HasSSE4A - True if the processor supports SSE4A instructions. bool HasSSE4A; @@ -100,7 +103,7 @@ protected: /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. /// unsigned MaxInlineSizeThreshold; - + /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -150,7 +153,10 @@ public: bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } + bool hasPOPCNT() const { return HasPOPCNT; } bool hasAVX() const { return HasAVX; } + bool hasXMM() const { return hasSSE1() || hasAVX(); } + bool hasXMMInt() const { return hasSSE2() || hasAVX(); } bool hasAES() const { return HasAES; } bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } @@ -160,23 +166,21 @@ public: bool hasVectorUAMem() const { return HasVectorUAMem; } bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } - + // ELF is a reasonably sane default and the only other X86 targets we // support are Darwin and Windows. Just use "not those". - bool isTargetELF() const { + bool isTargetELF() const { return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing(); } bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; } - bool isTargetMingw() const { - return TargetTriple.getOS() == Triple::MinGW32 || - TargetTriple.getOS() == Triple::MinGW64; } + bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; } bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; } bool isTargetCygMing() const { return isTargetMingw() || isTargetCygwin(); } - + /// isTargetCOFF - Return true if this is any COFF/Windows target variant. bool isTargetCOFF() const { return isTargetMingw() || isTargetCygwin() || isTargetWindows(); @@ -186,22 +190,12 @@ public: return Is64Bit && (isTargetMingw() || isTargetWindows()); } - bool isTargetWin32() const { - return !Is64Bit && (isTargetMingw() || isTargetWindows()); + bool isTargetEnvMacho() const { + return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO); } - std::string getDataLayout() const { - const char *p; - if (is64Bit()) - p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"; - else if (isTargetDarwin()) - p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; - else if (isTargetMingw() || isTargetWindows()) - p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32"; - else - p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; - - return std::string(p); + bool isTargetWin32() const { + return !Is64Bit && (isTargetMingw() || isTargetWindows()); } bool isPICStyleSet() const { return PICStyle != PICStyles::None; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index ce8636eb..889c824 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -30,10 +30,12 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { case Triple::Darwin: return new X86MCAsmInfoDarwin(TheTriple); case Triple::MinGW32: - case Triple::MinGW64: case Triple::Cygwin: case Triple::Win32: - return new X86MCAsmInfoCOFF(TheTriple); + if (TheTriple.getEnvironment() == Triple::MachO) + return new X86MCAsmInfoDarwin(TheTriple); + else + return new X86MCAsmInfoCOFF(TheTriple); default: return new X86ELFMCAsmInfo(TheTriple); } @@ -43,22 +45,25 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCContext &Ctx, TargetAsmBackend &TAB, raw_ostream &_OS, MCCodeEmitter *_Emitter, - bool RelaxAll) { + bool RelaxAll, + bool NoExecStack) { Triple TheTriple(TT); switch (TheTriple.getOS()) { case Triple::Darwin: return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); case Triple::MinGW32: - case Triple::MinGW64: case Triple::Cygwin: case Triple::Win32: - return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll); + if (TheTriple.getEnvironment() == Triple::MachO) + return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); + else + return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll); default: - return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); + return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack); } } -extern "C" void LLVMInitializeX86Target() { +extern "C" void LLVMInitializeX86Target() { // Register the target. RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target); RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target); @@ -89,28 +94,38 @@ extern "C" void LLVMInitializeX86Target() { X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, const std::string &FS) - : X86TargetMachine(T, TT, FS, false) { + : X86TargetMachine(T, TT, FS, false), + DataLayout(getSubtargetImpl()->isTargetDarwin() ? + "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" : + (getSubtargetImpl()->isTargetCygMing() || + getSubtargetImpl()->isTargetWindows()) ? + "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" : + "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"), + InstrInfo(*this), + TSInfo(*this), + TLInfo(*this), + JITInfo(*this) { } X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, const std::string &FS) - : X86TargetMachine(T, TT, FS, true) { + : X86TargetMachine(T, TT, FS, true), + DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"), + InstrInfo(*this), + TSInfo(*this), + TLInfo(*this), + JITInfo(*this) { } /// X86TargetMachine ctor - Create an X86 target. /// -X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, +X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool is64Bit) - : LLVMTargetMachine(T, TT), + : LLVMTargetMachine(T, TT), Subtarget(TT, FS, is64Bit), - DataLayout(Subtarget.getDataLayout()), - FrameInfo(TargetFrameInfo::StackGrowsDown, - Subtarget.getStackAlignment(), - (Subtarget.isTargetWin64() ? -40 : - (Subtarget.is64Bit() ? -8 : -4))), - InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this), - ELFWriterInfo(*this) { + FrameLowering(*this, Subtarget), + ELFWriterInfo(is64Bit, true) { DefRelocModel = getRelocationModel(); // If no relocation model was picked, default as appropriate for the target. @@ -217,12 +232,12 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) { // FIXME: Move this to TargetJITInfo! // On Darwin, do not override 64-bit setting made in X86TargetMachine(). - if (DefRelocModel == Reloc::Default && + if (DefRelocModel == Reloc::Default && (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) { setRelocationModel(Reloc::Static); Subtarget.setPICStyle(PICStyles::None); } - + PM.add(createX86JITCodeEmitterPass(*this, JCE)); diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index f9fb424..5973922 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -14,16 +14,17 @@ #ifndef X86TARGETMACHINE_H #define X86TARGETMACHINE_H -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "X86.h" #include "X86ELFWriterInfo.h" #include "X86InstrInfo.h" -#include "X86JITInfo.h" -#include "X86Subtarget.h" #include "X86ISelLowering.h" +#include "X86FrameLowering.h" +#include "X86JITInfo.h" #include "X86SelectionDAGInfo.h" +#include "X86Subtarget.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -31,12 +32,7 @@ class formatted_raw_ostream; class X86TargetMachine : public LLVMTargetMachine { X86Subtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment - TargetFrameInfo FrameInfo; - X86InstrInfo InstrInfo; - X86JITInfo JITInfo; - X86TargetLowering TLInfo; - X86SelectionDAGInfo TSInfo; + X86FrameLowering FrameLowering; X86ELFWriterInfo ELFWriterInfo; Reloc::Model DefRelocModel; // Reloc model before it's overridden. @@ -49,20 +45,25 @@ public: X86TargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool is64Bit); - virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } - virtual X86JITInfo *getJITInfo() { return &JITInfo; } + virtual const X86InstrInfo *getInstrInfo() const { + llvm_unreachable("getInstrInfo not implemented"); + } + virtual const TargetFrameLowering *getFrameLowering() const { + return &FrameLowering; + } + virtual X86JITInfo *getJITInfo() { + llvm_unreachable("getJITInfo not implemented"); + } virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; } - virtual const X86TargetLowering *getTargetLowering() const { - return &TLInfo; + virtual const X86TargetLowering *getTargetLowering() const { + llvm_unreachable("getTargetLowering not implemented"); } virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { - return &TSInfo; + llvm_unreachable("getSelectionDAGInfo not implemented"); } virtual const X86RegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); + return &getInstrInfo()->getRegisterInfo(); } - virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86ELFWriterInfo *getELFWriterInfo() const { return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; } @@ -79,17 +80,53 @@ public: /// X86_32TargetMachine - X86 32-bit target machine. /// class X86_32TargetMachine : public X86TargetMachine { + const TargetData DataLayout; // Calculates type size & alignment + X86InstrInfo InstrInfo; + X86SelectionDAGInfo TSInfo; + X86TargetLowering TLInfo; + X86JITInfo JITInfo; public: X86_32TargetMachine(const Target &T, const std::string &M, const std::string &FS); + virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const X86TargetLowering *getTargetLowering() const { + return &TLInfo; + } + virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const X86InstrInfo *getInstrInfo() const { + return &InstrInfo; + } + virtual X86JITInfo *getJITInfo() { + return &JITInfo; + } }; /// X86_64TargetMachine - X86 64-bit target machine. /// class X86_64TargetMachine : public X86TargetMachine { + const TargetData DataLayout; // Calculates type size & alignment + X86InstrInfo InstrInfo; + X86SelectionDAGInfo TSInfo; + X86TargetLowering TLInfo; + X86JITInfo JITInfo; public: X86_64TargetMachine(const Target &T, const std::string &TT, const std::string &FS); + virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const X86TargetLowering *getTargetLowering() const { + return &TLInfo; + } + virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const X86InstrInfo *getInstrInfo() const { + return &InstrInfo; + } + virtual X86JITInfo *getJITInfo() { + return &JITInfo; + } }; } // End llvm namespace |