summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/CMakeLists.txt6
-rw-r--r--lib/Target/X86/AsmParser/Makefile15
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp479
-rw-r--r--lib/Target/X86/AsmPrinter/CMakeLists.txt6
-rw-r--r--lib/Target/X86/AsmPrinter/Makefile2
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp84
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h86
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp947
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.h150
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp131
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h99
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp485
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.h54
-rw-r--r--lib/Target/X86/CMakeLists.txt14
-rw-r--r--lib/Target/X86/Makefile4
-rw-r--r--lib/Target/X86/README-X86-64.txt49
-rw-r--r--lib/Target/X86/README.txt20
-rw-r--r--lib/Target/X86/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/X86/TargetInfo/Makefile15
-rw-r--r--lib/Target/X86/TargetInfo/X86TargetInfo.cpp23
-rw-r--r--lib/Target/X86/X86.h17
-rw-r--r--lib/Target/X86/X86.td32
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.cpp123
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h67
-rw-r--r--lib/Target/X86/X86CallingConv.td45
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp672
-rw-r--r--lib/Target/X86/X86CompilationCallback_Win64.asm31
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.cpp93
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.h23
-rw-r--r--lib/Target/X86/X86FastISel.cpp415
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp45
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp6
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp1124
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp2359
-rw-r--r--lib/Target/X86/X86ISelLowering.h188
-rw-r--r--lib/Target/X86/X86Instr64bit.td468
-rw-r--r--lib/Target/X86/X86InstrBuilder.h52
-rw-r--r--lib/Target/X86/X86InstrFPStack.td25
-rw-r--r--lib/Target/X86/X86InstrFormats.td30
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp1336
-rw-r--r--lib/Target/X86/X86InstrInfo.h122
-rw-r--r--lib/Target/X86/X86InstrInfo.td802
-rw-r--r--lib/Target/X86/X86InstrMMX.td121
-rw-r--r--lib/Target/X86/X86InstrSSE.td560
-rw-r--r--lib/Target/X86/X86JITInfo.cpp81
-rw-r--r--lib/Target/X86/X86JITInfo.h7
-rw-r--r--lib/Target/X86/X86MCAsmInfo.cpp123
-rw-r--r--lib/Target/X86/X86MCAsmInfo.h42
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp616
-rw-r--r--lib/Target/X86/X86RegisterInfo.h27
-rw-r--r--lib/Target/X86/X86RegisterInfo.td344
-rw-r--r--lib/Target/X86/X86Relocations.h30
-rw-r--r--lib/Target/X86/X86Subtarget.cpp232
-rw-r--r--lib/Target/X86/X86Subtarget.h107
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp279
-rw-r--r--lib/Target/X86/X86TargetMachine.h50
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp65
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h40
58 files changed, 9798 insertions, 3677 deletions
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
new file mode 100644
index 0000000..034d5ab
--- /dev/null
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86AsmParser
+ X86AsmParser.cpp
+ )
+add_dependencies(LLVMX86AsmParser X86CodeGenTable_gen)
diff --git a/lib/Target/X86/AsmParser/Makefile b/lib/Target/X86/AsmParser/Makefile
new file mode 100644
index 0000000..25fb0a2
--- /dev/null
+++ b/lib/Target/X86/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/AsmParser/Makefile -------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86AsmParser
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
new file mode 100644
index 0000000..c357b4d
--- /dev/null
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -0,0 +1,479 @@
+//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLexer.h"
+#include "llvm/MC/MCAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+using namespace llvm;
+
+namespace {
+struct X86Operand;
+
+class X86ATTAsmParser : public TargetAsmParser {
+ MCAsmParser &Parser;
+
+private:
+ MCAsmParser &getParser() const { return Parser; }
+
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+ bool ParseRegister(X86Operand &Op);
+
+ bool ParseOperand(X86Operand &Op);
+
+ bool ParseMemOperand(X86Operand &Op);
+
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+ bool MatchInstruction(SmallVectorImpl<X86Operand> &Operands,
+ MCInst &Inst);
+
+ /// MatchRegisterName - Match the given string to a register name, or 0 if
+ /// there is no match.
+ unsigned MatchRegisterName(const StringRef &Name);
+
+ /// }
+
+public:
+ X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
+ : TargetAsmParser(T), Parser(_Parser) {}
+
+ virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst);
+
+ virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+} // end anonymous namespace
+
+
+namespace {
+
+/// X86Operand - Instances of this class represent a parsed X86 machine
+/// instruction.
+struct X86Operand {
+ enum {
+ Token,
+ Register,
+ Immediate,
+ Memory
+ } Kind;
+
+ union {
+ struct {
+ const char *Data;
+ unsigned Length;
+ } Tok;
+
+ struct {
+ unsigned RegNo;
+ } Reg;
+
+ struct {
+ const MCExpr *Val;
+ } Imm;
+
+ struct {
+ unsigned SegReg;
+ const MCExpr *Disp;
+ unsigned BaseReg;
+ unsigned IndexReg;
+ unsigned Scale;
+ } Mem;
+ };
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ unsigned getReg() const {
+ assert(Kind == Register && "Invalid access!");
+ return Reg.RegNo;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ const MCExpr *getMemDisp() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Disp;
+ }
+ unsigned getMemSegReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.SegReg;
+ }
+ unsigned getMemBaseReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.BaseReg;
+ }
+ unsigned getMemIndexReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.IndexReg;
+ }
+ unsigned getMemScale() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Scale;
+ }
+
+ bool isToken() const {return Kind == Token; }
+
+ bool isImm() const { return Kind == Immediate; }
+
+ bool isImmSExt8() const {
+ // Accept immediates which fit in 8 bits when sign extended, and
+ // non-absolute immediates.
+ if (!isImm())
+ return false;
+
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
+ int64_t Value = CE->getValue();
+ return Value == (int64_t) (int8_t) Value;
+ }
+
+ return true;
+ }
+
+ bool isMem() const { return Kind == Memory; }
+
+ bool isReg() const { return Kind == Register; }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ }
+
+ void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
+ // FIXME: Support user customization of the render method.
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ }
+
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ assert((N == 4 || N == 5) && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
+ Inst.addOperand(MCOperand::CreateImm(getMemScale()));
+ Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
+ Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+
+ // FIXME: What a hack.
+ if (N == 5)
+ Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+ }
+
+ static X86Operand CreateToken(StringRef Str) {
+ X86Operand Res;
+ Res.Kind = Token;
+ Res.Tok.Data = Str.data();
+ Res.Tok.Length = Str.size();
+ return Res;
+ }
+
+ static X86Operand CreateReg(unsigned RegNo) {
+ X86Operand Res;
+ Res.Kind = Register;
+ Res.Reg.RegNo = RegNo;
+ return Res;
+ }
+
+ static X86Operand CreateImm(const MCExpr *Val) {
+ X86Operand Res;
+ Res.Kind = Immediate;
+ Res.Imm.Val = Val;
+ return Res;
+ }
+
+ static X86Operand CreateMem(unsigned SegReg, const MCExpr *Disp,
+ unsigned BaseReg, unsigned IndexReg,
+ unsigned Scale) {
+ // We should never just have a displacement, that would be an immediate.
+ assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
+
+ // The scale should always be one of {1,2,4,8}.
+ assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
+ "Invalid scale!");
+ X86Operand Res;
+ Res.Kind = Memory;
+ Res.Mem.SegReg = SegReg;
+ Res.Mem.Disp = Disp;
+ Res.Mem.BaseReg = BaseReg;
+ Res.Mem.IndexReg = IndexReg;
+ Res.Mem.Scale = Scale;
+ return Res;
+ }
+};
+
+} // end anonymous namespace.
+
+
+bool X86ATTAsmParser::ParseRegister(X86Operand &Op) {
+ const AsmToken &TokPercent = getLexer().getTok();
+ (void)TokPercent; // Avoid warning when assertions are disabled.
+ assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
+ getLexer().Lex(); // Eat percent token.
+
+ const AsmToken &Tok = getLexer().getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return Error(Tok.getLoc(), "invalid register name");
+
+ // FIXME: Validate register for the current architecture; we have to do
+ // validation later, so maybe there is no need for this here.
+ unsigned RegNo;
+
+ RegNo = MatchRegisterName(Tok.getString());
+ if (RegNo == 0)
+ return Error(Tok.getLoc(), "invalid register name");
+
+ Op = X86Operand::CreateReg(RegNo);
+ getLexer().Lex(); // Eat identifier token.
+
+ return false;
+}
+
+bool X86ATTAsmParser::ParseOperand(X86Operand &Op) {
+ switch (getLexer().getKind()) {
+ default:
+ return ParseMemOperand(Op);
+ case AsmToken::Percent:
+ // FIXME: if a segment register, this could either be just the seg reg, or
+ // the start of a memory operand.
+ return ParseRegister(Op);
+ case AsmToken::Dollar: {
+ // $42 -> immediate.
+ getLexer().Lex();
+ const MCExpr *Val;
+ if (getParser().ParseExpression(Val))
+ return true;
+ Op = X86Operand::CreateImm(Val);
+ return false;
+ }
+ }
+}
+
+/// ParseMemOperand: segment: disp(basereg, indexreg, scale)
+bool X86ATTAsmParser::ParseMemOperand(X86Operand &Op) {
+ // FIXME: If SegReg ':' (e.g. %gs:), eat and remember.
+ unsigned SegReg = 0;
+
+ // We have to disambiguate a parenthesized expression "(4+5)" from the start
+ // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
+ // only way to do this without lookahead is to eat the ( and see what is after
+ // it.
+ const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+ if (getLexer().isNot(AsmToken::LParen)) {
+ if (getParser().ParseExpression(Disp)) return true;
+
+ // After parsing the base expression we could either have a parenthesized
+ // memory address or not. If not, return now. If so, eat the (.
+ if (getLexer().isNot(AsmToken::LParen)) {
+ // Unless we have a segment register, treat this as an immediate.
+ if (SegReg)
+ Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 1);
+ else
+ Op = X86Operand::CreateImm(Disp);
+ return false;
+ }
+
+ // Eat the '('.
+ getLexer().Lex();
+ } else {
+ // Okay, we have a '('. We don't know if this is an expression or not, but
+ // so we have to eat the ( to see beyond it.
+ getLexer().Lex(); // Eat the '('.
+
+ if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
+ // Nothing to do here, fall into the code below with the '(' part of the
+ // memory operand consumed.
+ } else {
+ // It must be an parenthesized expression, parse it now.
+ if (getParser().ParseParenExpression(Disp))
+ return true;
+
+ // After parsing the base expression we could either have a parenthesized
+ // memory address or not. If not, return now. If so, eat the (.
+ if (getLexer().isNot(AsmToken::LParen)) {
+ // Unless we have a segment register, treat this as an immediate.
+ if (SegReg)
+ Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 1);
+ else
+ Op = X86Operand::CreateImm(Disp);
+ return false;
+ }
+
+ // Eat the '('.
+ getLexer().Lex();
+ }
+ }
+
+ // If we reached here, then we just ate the ( of the memory operand. Process
+ // the rest of the memory operand.
+ unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+
+ if (getLexer().is(AsmToken::Percent)) {
+ if (ParseRegister(Op))
+ return true;
+ BaseReg = Op.getReg();
+ }
+
+ if (getLexer().is(AsmToken::Comma)) {
+ getLexer().Lex(); // Eat the comma.
+
+ // Following the comma we should have either an index register, or a scale
+ // value. We don't support the later form, but we want to parse it
+ // correctly.
+ //
+ // Not that even though it would be completely consistent to support syntax
+ // like "1(%eax,,1)", the assembler doesn't.
+ if (getLexer().is(AsmToken::Percent)) {
+ if (ParseRegister(Op))
+ return true;
+ IndexReg = Op.getReg();
+
+ if (getLexer().isNot(AsmToken::RParen)) {
+ // Parse the scale amount:
+ // ::= ',' [scale-expression]
+ if (getLexer().isNot(AsmToken::Comma))
+ return true;
+ getLexer().Lex(); // Eat the comma.
+
+ if (getLexer().isNot(AsmToken::RParen)) {
+ SMLoc Loc = getLexer().getTok().getLoc();
+
+ int64_t ScaleVal;
+ if (getParser().ParseAbsoluteExpression(ScaleVal))
+ return true;
+
+ // Validate the scale amount.
+ if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8)
+ return Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
+ Scale = (unsigned)ScaleVal;
+ }
+ }
+ } else if (getLexer().isNot(AsmToken::RParen)) {
+ // Otherwise we have the unsupported form of a scale amount without an
+ // index.
+ SMLoc Loc = getLexer().getTok().getLoc();
+
+ int64_t Value;
+ if (getParser().ParseAbsoluteExpression(Value))
+ return true;
+
+ return Error(Loc, "cannot have scale factor without index register");
+ }
+ }
+
+ // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(getLexer().getTok().getLoc(),
+ "unexpected token in memory operand");
+ getLexer().Lex(); // Eat the ')'.
+
+ Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale);
+ return false;
+}
+
+bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
+ SmallVector<X86Operand, 8> Operands;
+
+ Operands.push_back(X86Operand::CreateToken(Name));
+
+ SMLoc Loc = getLexer().getTok().getLoc();
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+ // Parse '*' modifier.
+ if (getLexer().is(AsmToken::Star)) {
+ getLexer().Lex(); // Eat the star.
+ Operands.push_back(X86Operand::CreateToken("*"));
+ }
+
+ // Read the first operand.
+ Operands.push_back(X86Operand());
+ if (ParseOperand(Operands.back()))
+ return true;
+
+ while (getLexer().is(AsmToken::Comma)) {
+ getLexer().Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ Operands.push_back(X86Operand());
+ if (ParseOperand(Operands.back()))
+ return true;
+ }
+ }
+
+ if (!MatchInstruction(Operands, Inst))
+ return false;
+
+ // FIXME: We should give nicer diagnostics about the exact failure.
+
+ Error(Loc, "unrecognized instruction");
+ return true;
+}
+
+bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ getLexer().Lex();
+ }
+ }
+
+ getLexer().Lex();
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmParser() {
+ RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
+ RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
+}
+
+#include "X86GenAsmMatcher.inc"
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
index a28c826..b70a587 100644
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -1,9 +1,9 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMX86AsmPrinter
- X86ATTAsmPrinter.cpp
X86ATTInstPrinter.cpp
X86AsmPrinter.cpp
- X86IntelAsmPrinter.cpp
+ X86IntelInstPrinter.cpp
+ X86MCInstLower.cpp
)
-add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen) \ No newline at end of file
+add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile
index ba89ac6..2368761 100644
--- a/lib/Target/X86/AsmPrinter/Makefile
+++ b/lib/Target/X86/AsmPrinter/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===##
+##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index fa0ee75..bc70ffe 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -13,10 +13,13 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
+#include "X86ATTInstPrinter.h"
#include "llvm/MC/MCInst.h"
-#include "X86ATTAsmPrinter.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "X86GenInstrNames.inc"
using namespace llvm;
// Include the auto-generated portion of the assembly writer.
@@ -25,9 +28,11 @@ using namespace llvm;
#include "X86GenAsmWriter.inc"
#undef MachineInstr
-void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) {
+void X86ATTInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+
+void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op) {
switch (MI->getOperand(Op).getImm()) {
- default: assert(0 && "Invalid ssecc argument!");
+ default: llvm_unreachable("Invalid ssecc argument!");
case 0: O << "eq"; break;
case 1: O << "lt"; break;
case 2: O << "le"; break;
@@ -39,61 +44,36 @@ void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) {
}
}
-
-void X86ATTAsmPrinter::printPICLabel(const MCInst *MI, unsigned Op) {
- assert(0 &&
- "This is only used for MOVPC32r, should lower before asm printing!");
-}
-
-
/// print_pcrel_imm - This is used to print an immediate value that ends up
/// being encoded as a pc-relative value. These print slightly differently, for
/// example, a $ is not emitted.
-void X86ATTAsmPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) {
+void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) {
const MCOperand &Op = MI->getOperand(OpNo);
-
if (Op.isImm())
O << Op.getImm();
- else if (Op.isMBBLabel())
- // FIXME: Keep in sync with printBasicBlockLabel. printBasicBlockLabel
- // should eventually call into this code, not the other way around.
- O << TAI->getPrivateGlobalPrefix() << "BB" << Op.getMBBLabelFunction()
- << '_' << Op.getMBBLabelBlock();
- else
- assert(0 && "Unknown pcrel immediate operand");
+ else {
+ assert(Op.isExpr() && "unknown pcrel immediate operand");
+ Op.getExpr()->print(O, &MAI);
+ }
}
-
-void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- const char *Modifier) {
+void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ const char *Modifier) {
assert(Modifier == 0 && "Modifiers should not be used");
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
- O << '%';
- unsigned Reg = Op.getReg();
-#if 0
- if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
- MVT VT = (strcmp(Modifier+6,"64") == 0) ?
- MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
- ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
- Reg = getX86SubSuperRegister(Reg, VT);
- }
-#endif
- O << TRI->getAsmName(Reg);
- return;
+ O << '%' << getRegisterName(Op.getReg());
} else if (Op.isImm()) {
- //if (!Modifier || (strcmp(Modifier, "debug") && strcmp(Modifier, "mem")))
+ O << '$' << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
O << '$';
- O << Op.getImm();
- return;
+ Op.getExpr()->print(O, &MAI);
}
-
- O << "<<UNKNOWN OPERAND KIND>>";
}
-void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
-
+void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
const MCOperand &BaseReg = MI->getOperand(Op);
const MCOperand &IndexReg = MI->getOperand(Op+2);
const MCOperand &DispSpec = MI->getOperand(Op+3);
@@ -103,19 +83,11 @@ void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
O << DispVal;
} else {
- abort();
- //assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
- // DispSpec.isJTI() || DispSpec.isSymbol());
- //printOperand(MI, Op+3, "mem");
+ assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+ DispSpec.getExpr()->print(O, &MAI);
}
if (IndexReg.getReg() || BaseReg.getReg()) {
- // There are cases where we can end up with ESP/RSP in the indexreg slot.
- // If this happens, swap the base/index register to support assemblers that
- // don't work when the index is *SP.
- // FIXME: REMOVE THIS.
- assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP);
-
O << '(';
if (BaseReg.getReg())
printOperand(MI, Op);
@@ -131,9 +103,9 @@ void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
}
}
-void X86ATTAsmPrinter::printMemReference(const MCInst *MI, unsigned Op) {
- const MCOperand &Segment = MI->getOperand(Op+4);
- if (Segment.getReg()) {
+void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op) {
+ // If this has a segment register, print it.
+ if (MI->getOperand(Op+4).getReg()) {
printOperand(MI, Op+4);
O << ':';
}
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
new file mode 100644
index 0000000..5f28fa4
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
@@ -0,0 +1,86 @@
+//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to AT&T style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ATT_INST_PRINTER_H
+#define X86_ATT_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+ class MCOperand;
+
+class X86ATTInstPrinter : public MCInstPrinter {
+public:
+ X86ATTInstPrinter(raw_ostream &O, const MCAsmInfo &MAI)
+ : MCInstPrinter(O, MAI) {}
+
+
+ virtual void printInst(const MCInst *MI);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void printOperand(const MCInst *MI, unsigned OpNo,
+ const char *Modifier = 0);
+ void printMemReference(const MCInst *MI, unsigned Op);
+ void printLeaMemReference(const MCInst *MI, unsigned Op);
+ void printSSECC(const MCInst *MI, unsigned Op);
+ void print_pcrel_imm(const MCInst *MI, unsigned OpNo);
+
+ void printopaquemem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+
+ void printi8mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi16mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi32mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi64mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi128mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf32mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf64mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf80mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf128mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printlea32mem(const MCInst *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64mem(const MCInst *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64_32mem(const MCInst *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo);
+ }
+};
+
+}
+
+#endif
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index e5d80a4..2a0290d 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- X86AsmPrinter.cpp - Convert X86 LLVM IR to X86 assembly -----------===//
+//===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,42 +7,937 @@
//
//===----------------------------------------------------------------------===//
//
-// This file the shared super class printer that converts from our internal
-// representation of machine-dependent LLVM code to Intel and AT&T format
-// assembly language.
-// This printer is the output mechanism used by `llc'.
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to AT&T format assembly
+// language. This printer is the output mechanism used by `llc'.
//
//===----------------------------------------------------------------------===//
-#include "X86ATTAsmPrinter.h"
-#include "X86IntelAsmPrinter.h"
-#include "X86Subtarget.h"
+#define DEBUG_TYPE "asm-printer"
+#include "X86AsmPrinter.h"
+#include "X86ATTInstPrinter.h"
+#include "X86IntelInstPrinter.h"
+#include "X86MCInstLower.h"
+#include "X86.h"
+#include "X86COFF.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
using namespace llvm;
-/// createX86CodePrinterPass - Returns a pass that prints the X86 assembly code
-/// for a MachineFunction to the given output stream, using the given target
-/// machine description.
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+//===----------------------------------------------------------------------===//
+// Primitive Helper Functions.
+//===----------------------------------------------------------------------===//
+
+void X86AsmPrinter::printMCInst(const MCInst *MI) {
+ if (MAI->getAssemblerDialect() == 0)
+ X86ATTInstPrinter(O, *MAI).printInstruction(MI);
+ else
+ X86IntelInstPrinter(O, *MAI).printInstruction(MI);
+}
+
+void X86AsmPrinter::PrintPICBaseSymbol() const {
+ // FIXME: Gross const cast hack.
+ X86AsmPrinter *AP = const_cast<X86AsmPrinter*>(this);
+ X86MCInstLower(OutContext, 0, *AP).GetPICBaseSymbol()->print(O, MAI);
+}
+
+void X86AsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
+ unsigned FnAlign = MF.getAlignment();
+ const Function *F = MF.getFunction();
+
+ if (Subtarget->isTargetCygMing()) {
+ X86COFFMachineModuleInfo &COFFMMI =
+ MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+ COFFMMI.DecorateCygMingName(CurrentFnName, F, *TM.getTargetData());
+ }
+
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+ EmitAlignment(FnAlign, F);
+
+ switch (F->getLinkage()) {
+ default: llvm_unreachable("Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ case Function::PrivateLinkage:
+ break;
+ case Function::DLLExportLinkage:
+ case Function::ExternalLinkage:
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ break;
+ case Function::LinkerPrivateLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ if (Subtarget->isTargetDarwin()) {
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ O << MAI->getWeakDefDirective() << CurrentFnName << '\n';
+ } else if (Subtarget->isTargetCygMing()) {
+ O << "\t.globl\t" << CurrentFnName << "\n"
+ "\t.linkonce discard\n";
+ } else {
+ O << "\t.weak\t" << CurrentFnName << '\n';
+ }
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ if (Subtarget->isTargetELF())
+ O << "\t.type\t" << CurrentFnName << ",@function\n";
+ else if (Subtarget->isTargetCygMing()) {
+ O << "\t.def\t " << CurrentFnName
+ << ";\t.scl\t" <<
+ (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT)
+ << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
+ << ";\t.endef\n";
+ }
+
+ O << CurrentFnName << ':';
+ if (VerboseAsm) {
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << ' ';
+ WriteAsOperand(O, F, /*PrintType=*/false, F->getParent());
+ }
+ O << '\n';
+
+ // Add some workaround for linkonce linkage on Cygwin\MinGW
+ if (Subtarget->isTargetCygMing() &&
+ (F->hasLinkOnceLinkage() || F->hasWeakLinkage()))
+ O << "Lllvm$workaround$fake$stub$" << CurrentFnName << ":\n";
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ this->MF = &MF;
+ CallingConv::ID CC = F->getCallingConv();
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ if (Subtarget->isTargetCOFF()) {
+ X86COFFMachineModuleInfo &COFFMMI =
+ MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+
+ // Populate function information map. Don't want to populate
+ // non-stdcall or non-fastcall functions' information right now.
+ if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall)
+ COFFMMI.AddFunctionInfo(F, *MF.getInfo<X86MachineFunctionInfo>());
+ }
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print the 'header' of function
+ emitFunctionHeader(MF);
+
+ // Emit pre-function debug and/or EH information.
+ if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
+ DW->BeginFunction(&MF);
+
+ // Print out code for the function.
+ bool hasAnyRealCode = false;
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ EmitBasicBlockStart(I);
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ // Print the assembly for the instruction.
+ if (!II->isLabel())
+ hasAnyRealCode = true;
+ printMachineInstruction(II);
+ }
+ }
+
+ if (Subtarget->isTargetDarwin() && !hasAnyRealCode) {
+ // If the function is empty, then we need to emit *something*. Otherwise,
+ // the function's label might be associated with something that it wasn't
+ // meant to be associated with. We emit a noop in this situation.
+ // We are assuming inline asms are code.
+ O << "\tnop\n";
+ }
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
+
+ // Emit post-function debug information.
+ if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
+ DW->EndFunction(&MF);
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // We didn't modify anything.
+ return false;
+}
+
+/// printSymbolOperand - Print a raw symbol reference operand. This handles
+/// jump tables, constant pools, global address and external symbols, all of
+/// which print to a label with various suffixes for relocation types etc.
+void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown symbol type!");
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
+ << MO.getIndex();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << MO.getIndex();
+ printOffset(MO.getOffset());
+ break;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+
+ const char *Suffix = "";
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
+ Suffix = "$stub";
+ else if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+ Suffix = "$non_lazy_ptr";
+
+ std::string Name = Mang->getMangledName(GV, Suffix, Suffix[0] != '\0');
+ if (Subtarget->isTargetCygMing()) {
+ X86COFFMachineModuleInfo &COFFMMI =
+ MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+ COFFMMI.DecorateCygMingName(Name, GV, *TM.getTargetData());
+ }
+
+ // Handle dllimport linkage.
+ if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
+ Name = "__imp_" + Name;
+
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
+ SmallString<128> NameStr;
+ Mang->getNameWithPrefix(NameStr, GV, true);
+ NameStr += "$non_lazy_ptr";
+ MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());
+
+ const MCSymbol *&StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
+ if (StubSym == 0) {
+ NameStr.clear();
+ Mang->getNameWithPrefix(NameStr, GV, false);
+ StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+ }
+ } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
+ SmallString<128> NameStr;
+ Mang->getNameWithPrefix(NameStr, GV, true);
+ NameStr += "$non_lazy_ptr";
+ MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());
+ const MCSymbol *&StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
+ if (StubSym == 0) {
+ NameStr.clear();
+ Mang->getNameWithPrefix(NameStr, GV, false);
+ StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+ }
+ } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+ SmallString<128> NameStr;
+ Mang->getNameWithPrefix(NameStr, GV, true);
+ NameStr += "$stub";
+ MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());
+ const MCSymbol *&StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+ if (StubSym == 0) {
+ NameStr.clear();
+ Mang->getNameWithPrefix(NameStr, GV, false);
+ StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+ }
+ }
+
+ // If the name begins with a dollar-sign, enclose it in parens. We do this
+ // to avoid having it look like an integer immediate to the assembler.
+ if (Name[0] == '$')
+ O << '(' << Name << ')';
+ else
+ O << Name;
+
+ printOffset(MO.getOffset());
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ std::string Name = Mang->makeNameProper(MO.getSymbolName());
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+ Name += "$stub";
+ MCSymbol *Sym = OutContext.GetOrCreateSymbol(Name);
+ const MCSymbol *&StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+ if (StubSym == 0) {
+ Name.erase(Name.end()-5, Name.end());
+ StubSym = OutContext.GetOrCreateSymbol(Name);
+ }
+ }
+
+ // If the name begins with a dollar-sign, enclose it in parens. We do this
+ // to avoid having it look like an integer immediate to the assembler.
+ if (Name[0] == '$')
+ O << '(' << Name << ')';
+ else
+ O << Name;
+ break;
+ }
+ }
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+ case X86II::MO_NO_FLAG: // No flag.
+ break;
+ case X86II::MO_DARWIN_NONLAZY:
+ case X86II::MO_DLLIMPORT:
+ case X86II::MO_DARWIN_STUB:
+ // These affect the name of the symbol, not any suffix.
+ break;
+ case X86II::MO_GOT_ABSOLUTE_ADDRESS:
+ O << " + [.-";
+ PrintPICBaseSymbol();
+ O << ']';
+ break;
+ case X86II::MO_PIC_BASE_OFFSET:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+ O << '-';
+ PrintPICBaseSymbol();
+ break;
+ case X86II::MO_TLSGD: O << "@TLSGD"; break;
+ case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
+ case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
+ case X86II::MO_TPOFF: O << "@TPOFF"; break;
+ case X86II::MO_NTPOFF: O << "@NTPOFF"; break;
+ case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break;
+ case X86II::MO_GOT: O << "@GOT"; break;
+ case X86II::MO_GOTOFF: O << "@GOTOFF"; break;
+ case X86II::MO_PLT: O << "@PLT"; break;
+ }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value. These print slightly differently, for
+/// example, a $ is not emitted.
+void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ default: llvm_unreachable("Unknown pcrel immediate operand");
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI);
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ printSymbolOperand(MO);
+ return;
+ }
+}
+
+
+void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type!");
+ case MachineOperand::MO_Register: {
+ O << '%';
+ unsigned Reg = MO.getReg();
+ if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+ EVT VT = (strcmp(Modifier+6,"64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+ Reg = getX86SubSuperRegister(Reg, VT);
+ }
+ O << X86ATTInstPrinter::getRegisterName(Reg);
+ return;
+ }
+
+ case MachineOperand::MO_Immediate:
+ O << '$' << MO.getImm();
+ return;
+
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol: {
+ O << '$';
+ printSymbolOperand(MO);
+ break;
+ }
+ }
+}
+
+void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
+ unsigned char value = MI->getOperand(Op).getImm();
+ assert(value <= 7 && "Invalid ssecc argument!");
+ switch (value) {
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier) {
+ const MachineOperand &BaseReg = MI->getOperand(Op);
+ const MachineOperand &IndexReg = MI->getOperand(Op+2);
+ const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+ // If we really don't want to print out (rip), don't.
+ bool HasBaseReg = BaseReg.getReg() != 0;
+ if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
+ BaseReg.getReg() == X86::RIP)
+ HasBaseReg = false;
+
+ // HasParenPart - True if we will print out the () part of the mem ref.
+ bool HasParenPart = IndexReg.getReg() || HasBaseReg;
+
+ if (DispSpec.isImm()) {
+ int DispVal = DispSpec.getImm();
+ if (DispVal || !HasParenPart)
+ O << DispVal;
+ } else {
+ assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
+ DispSpec.isJTI() || DispSpec.isSymbol());
+ printSymbolOperand(MI->getOperand(Op+3));
+ }
+
+ if (HasParenPart) {
+ assert(IndexReg.getReg() != X86::ESP &&
+ "X86 doesn't allow scaling by ESP");
+
+ O << '(';
+ if (HasBaseReg)
+ printOperand(MI, Op, Modifier);
+
+ if (IndexReg.getReg()) {
+ O << ',';
+ printOperand(MI, Op+2, Modifier);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ if (ScaleVal != 1)
+ O << ',' << ScaleVal;
+ }
+ O << ')';
+ }
+}
+
+void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier) {
+ assert(isMem(MI, Op) && "Invalid memory reference!");
+ const MachineOperand &Segment = MI->getOperand(Op+4);
+ if (Segment.getReg()) {
+ printOperand(MI, Op+4, Modifier);
+ O << ':';
+ }
+ printLeaMemReference(MI, Op, Modifier);
+}
+
+void X86AsmPrinter::printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const {
+ if (!MAI->getSetDirective())
+ return;
+
+ // We don't need .set machinery if we have GOT-style relocations
+ if (Subtarget->isPICStyleGOT())
+ return;
+
+ O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
+
+ GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+
+ if (Subtarget->isPICStyleRIPRel())
+ O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << uid << '\n';
+ else {
+ O << '-';
+ PrintPICBaseSymbol();
+ O << '\n';
+ }
+}
+
+
+void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
+ PrintPICBaseSymbol();
+ O << '\n';
+ PrintPICBaseSymbol();
+ O << ':';
+}
+
+void X86AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid) const {
+ const char *JTEntryDirective = MJTI->getEntrySize() == 4 ?
+ MAI->getData32bitsDirective() : MAI->getData64bitsDirective();
+
+ O << JTEntryDirective << ' ';
+
+ if (Subtarget->isPICStyleRIPRel() || Subtarget->isPICStyleStubPIC()) {
+ O << MAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << '_' << uid << "_set_" << MBB->getNumber();
+ } else if (Subtarget->isPICStyleGOT()) {
+ GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+ O << "@GOTOFF";
+ } else
+ GetMBBSymbol(MBB->getNumber())->print(O, MAI);
+}
+
+bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode) {
+ unsigned Reg = MO.getReg();
+ switch (Mode) {
+ default: return true; // Unknown mode.
+ case 'b': // Print QImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ break;
+ case 'h': // Print QImode high register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+ break;
+ case 'w': // Print HImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i16);
+ break;
+ case 'k': // Print SImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i32);
+ break;
+ case 'q': // Print DImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i64);
+ break;
+ }
+
+ O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
-FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o,
- X86TargetMachine &tm,
- bool verbose) {
- const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>();
+bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'a': // This is an address. Currently only 'i' and 'r' are expected.
+ if (MO.isImm()) {
+ O << MO.getImm();
+ return false;
+ }
+ if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
+ printSymbolOperand(MO);
+ return false;
+ }
+ if (MO.isReg()) {
+ O << '(';
+ printOperand(MI, OpNo);
+ O << ')';
+ return false;
+ }
+ return true;
+
+ case 'c': // Don't print "$" before a global var name or constant.
+ if (MO.isImm())
+ O << MO.getImm();
+ else if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol())
+ printSymbolOperand(MO);
+ else
+ printOperand(MI, OpNo);
+ return false;
+
+ case 'A': // Print '*' before a register (it must be a register)
+ if (MO.isReg()) {
+ O << '*';
+ printOperand(MI, OpNo);
+ return false;
+ }
+ return true;
+
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ case 'q': // Print DImode register
+ if (MO.isReg())
+ return printAsmMRegister(MO, ExtraCode[0]);
+ printOperand(MI, OpNo);
+ return false;
+
+ case 'P': // This is the operand of a call, treat specially.
+ print_pcrel_imm(MI, OpNo);
+ return false;
+
+ case 'n': // Negate the immediate or print a '-' before the operand.
+ // Note: this is a temporary solution. It should be handled target
+ // independently as part of the 'MC' work.
+ if (MO.isImm()) {
+ O << -MO.getImm();
+ return false;
+ }
+ O << '-';
+ }
+ }
- if (Subtarget->isFlavorIntel())
- return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
- return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
+ printOperand(MI, OpNo);
+ return false;
}
-namespace {
- static struct Register {
- Register() {
- X86TargetMachine::registerAsmPrinter(createX86CodePrinterPass);
+bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ case 'q': // Print SImode register
+ // These only apply to registers, ignore on mem.
+ break;
+ case 'P': // Don't print @PLT, but do print as memory.
+ printMemReference(MI, OpNo, "no-rip");
+ return false;
}
- } Registrator;
+ }
+ printMemReference(MI, OpNo);
+ return false;
+}
+
+
+
+/// printMachineInstruction -- Print out a single X86 LLVM instruction MI in
+/// AT&T syntax to the current output stream.
+///
+void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ processDebugLoc(MI, true);
+
+ printInstructionThroughMCStreamer(MI);
+
+ if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+ EmitComments(*MI);
+ O << '\n';
+
+ processDebugLoc(MI, false);
}
-extern "C" int X86AsmPrinterForceLink;
-int X86AsmPrinterForceLink = 0;
+void X86AsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) {
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar)) {
+ if (Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() == Reloc::Static) {
+ if (GVar->getName() == "llvm.global_ctors")
+ O << ".reference .constructors_used\n";
+ else if (GVar->getName() == "llvm.global_dtors")
+ O << ".reference .destructors_used\n";
+ }
+ return;
+ }
+
+ const TargetData *TD = TM.getTargetData();
+
+ std::string name = Mang->getMangledName(GVar);
+ Constant *C = GVar->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ if (Subtarget->isTargetELF())
+ O << "\t.type\t" << name << ",@object\n";
+
+
+ SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GVar, TM);
+ const MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GVar, GVKind, Mang, TM);
+ OutStreamer.SwitchSection(TheSection);
+
+ // FIXME: get this stuff from section kind flags.
+ if (C->isNullValue() && !GVar->hasSection() &&
+ // Don't put things that should go in the cstring section into "comm".
+ !TheSection->getKind().isMergeableCString()) {
+ if (GVar->hasExternalLinkage()) {
+ if (const char *Directive = MAI->getZeroFillDirective()) {
+ O << "\t.globl " << name << '\n';
+ O << Directive << "__DATA, __common, " << name << ", "
+ << Size << ", " << Align << '\n';
+ return;
+ }
+ }
+
+ if (!GVar->isThreadLocal() &&
+ (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (MAI->getLCOMMDirective() != NULL) {
+ if (GVar->hasLocalLinkage()) {
+ O << MAI->getLCOMMDirective() << name << ',' << Size;
+ if (Subtarget->isTargetDarwin())
+ O << ',' << Align;
+ } else if (Subtarget->isTargetDarwin() && !GVar->hasCommonLinkage()) {
+ O << "\t.globl " << name << '\n'
+ << MAI->getWeakDefDirective() << name << '\n';
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << ' ';
+ WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
+ }
+ O << '\n';
+ EmitGlobalConstant(C);
+ return;
+ } else {
+ O << MAI->getCOMMDirective() << name << ',' << Size;
+ if (MAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ } else {
+ if (!Subtarget->isTargetCygMing()) {
+ if (GVar->hasLocalLinkage())
+ O << "\t.local\t" << name << '\n';
+ }
+ O << MAI->getCOMMDirective() << name << ',' << Size;
+ if (MAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ if (VerboseAsm) {
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << ' ';
+ WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
+ }
+ O << '\n';
+ return;
+ }
+ }
+
+ switch (GVar->getLinkage()) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::LinkerPrivateLinkage:
+ if (Subtarget->isTargetDarwin()) {
+ O << "\t.globl " << name << '\n'
+ << MAI->getWeakDefDirective() << name << '\n';
+ } else if (Subtarget->isTargetCygMing()) {
+ O << "\t.globl\t" << name << "\n"
+ "\t.linkonce same_size\n";
+ } else {
+ O << "\t.weak\t" << name << '\n';
+ }
+ break;
+ case GlobalValue::DLLExportLinkage:
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.globl " << name << '\n';
+ // FALL THROUGH
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ break;
+ default:
+ llvm_unreachable("Unknown linkage type!");
+ }
+
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm){
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << ' ';
+ WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
+ }
+ O << '\n';
+
+ EmitGlobalConstant(C);
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ O << "\t.size\t" << name << ", " << Size << '\n';
+}
+
+void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetDarwin()) {
+ // All darwin targets use mach-o.
+ TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ // Output stubs for dynamically-linked functions.
+ MachineModuleInfoMachO::SymbolListTy Stubs;
+
+ Stubs = MMIMacho.GetFnStubList();
+ if (!Stubs.empty()) {
+ const MCSection *TheSection =
+ TLOFMacho.getMachOSection("__IMPORT", "__jump_table",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 5, SectionKind::getMetadata());
+ OutStreamer.SwitchSection(TheSection);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ Stubs[i].first->print(O, MAI);
+ O << ":\n" << "\t.indirect_symbol ";
+ // Get the MCSymbol without the $stub suffix.
+ Stubs[i].second->print(O, MAI);
+ O << "\n\thlt ; hlt ; hlt ; hlt ; hlt\n";
+ }
+ O << '\n';
+
+ Stubs.clear();
+ }
+
+ // Output stubs for external and common global variables.
+ Stubs = MMIMacho.GetGVStubList();
+ if (!Stubs.empty()) {
+ const MCSection *TheSection =
+ TLOFMacho.getMachOSection("__IMPORT", "__pointers",
+ MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+ OutStreamer.SwitchSection(TheSection);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ Stubs[i].first->print(O, MAI);
+ O << ":\n\t.indirect_symbol ";
+ Stubs[i].second->print(O, MAI);
+ O << "\n\t.long\t0\n";
+ }
+ Stubs.clear();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ Stubs[i].first->print(O, MAI);
+ O << ":\n" << MAI->getData32bitsDirective();
+ Stubs[i].second->print(O, MAI);
+ O << '\n';
+ }
+ Stubs.clear();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ O << "\t.subsections_via_symbols\n";
+ }
+
+ if (Subtarget->isTargetCOFF()) {
+ // Necessary for dllexport support
+ std::vector<std::string> DLLExportedFns, DLLExportedGlobals;
+
+ X86COFFMachineModuleInfo &COFFMMI =
+ MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+ TargetLoweringObjectFileCOFF &TLOFCOFF =
+ static_cast<TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedFns.push_back(Mang->getMangledName(I));
+
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedGlobals.push_back(Mang->getMangledName(I));
+
+ if (Subtarget->isTargetCygMing()) {
+ // Emit type information for external functions
+ for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(),
+ E = COFFMMI.stub_end(); I != E; ++I) {
+ O << "\t.def\t " << I->getKeyData()
+ << ";\t.scl\t" << COFF::C_EXT
+ << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
+ << ";\t.endef\n";
+ }
+ }
+
+ // Output linker support code for dllexported globals on windows.
+ if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
+ OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve",
+ true,
+ SectionKind::getMetadata()));
+
+ for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i)
+ O << "\t.ascii \" -export:" << DLLExportedGlobals[i] << ",data\"\n";
+
+ for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i)
+ O << "\t.ascii \" -export:" << DLLExportedFns[i] << "\"\n";
+ }
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ raw_ostream &O) {
+ if (SyntaxVariant == 0)
+ return new X86ATTInstPrinter(O, MAI);
+ if (SyntaxVariant == 1)
+ return new X86IntelInstPrinter(O, MAI);
+ return 0;
+}
// Force static initialization.
-extern "C" void LLVMInitializeX86AsmPrinter() { }
+extern "C" void LLVMInitializeX86AsmPrinter() {
+ RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
+ RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
+
+ TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter);
+}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
new file mode 100644
index 0000000..0351829
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
@@ -0,0 +1,150 @@
+//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AT&T assembly code printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ASMPRINTER_H
+#define X86ASMPRINTER_H
+
+#include "../X86.h"
+#include "../X86MachineFunctionInfo.h"
+#include "../X86TargetMachine.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MachineJumpTableInfo;
+class MCContext;
+class MCInst;
+class MCStreamer;
+class MCSymbol;
+
+class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
+ const X86Subtarget *Subtarget;
+ public:
+ explicit X86AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+ const MCAsmInfo *T, bool V)
+ : AsmPrinter(O, TM, T, V) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 AT&T-Style Assembly Printer";
+ }
+
+ const X86Subtarget &getSubtarget() const { return *Subtarget; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ AsmPrinter::getAnalysisUsage(AU);
+ }
+
+
+ virtual void EmitEndOfAsmFile(Module &M);
+
+ void printInstructionThroughMCStreamer(const MachineInstr *MI);
+
+
+ void printMCInst(const MCInst *MI);
+
+ void printSymbolOperand(const MachineOperand &MO);
+
+
+
+ // These methods are used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier = 0);
+ void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo);
+
+ void printopaquemem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+
+ void printi8mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi16mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi32mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi64mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi128mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf32mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf64mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf80mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf128mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printlea32mem(const MachineInstr *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64mem(const MachineInstr *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo, "subreg64");
+ }
+
+ bool printAsmMRegister(const MachineOperand &MO, char Mode);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printSSECC(const MachineInstr *MI, unsigned Op);
+ void printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL);
+ void printLeaMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL);
+ void printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const;
+ void printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const {
+ AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB);
+ }
+ void printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid) const;
+
+ void printPICLabel(const MachineInstr *MI, unsigned Op);
+ void PrintGlobalVariable(const GlobalVariable* GVar);
+
+ void PrintPICBaseSymbol() const;
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void emitFunctionHeader(const MachineFunction &MF);
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
new file mode 100644
index 0000000..fde5902
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
@@ -0,0 +1,131 @@
+//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86IntelInstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "X86GenInstrNames.inc"
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define MachineInstr MCInst
+#define NO_ASM_WRITER_BOILERPLATE
+#include "X86GenAsmWriter1.inc"
+#undef MachineInstr
+
+void X86IntelInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+
+void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op) {
+ switch (MI->getOperand(Op).getImm()) {
+ default: llvm_unreachable("Invalid ssecc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.
+void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm())
+ O << Op.getImm();
+ else {
+ assert(Op.isExpr() && "unknown pcrel immediate operand");
+ Op.getExpr()->print(O, &MAI);
+ }
+}
+
+static void PrintRegName(raw_ostream &O, StringRef RegName) {
+ for (unsigned i = 0, e = RegName.size(); i != e; ++i)
+ O << (char)toupper(RegName[i]);
+}
+
+void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ const char *Modifier) {
+ assert(Modifier == 0 && "Modifiers should not be used");
+
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ PrintRegName(O, getRegisterName(Op.getReg()));
+ } else if (Op.isImm()) {
+ O << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ Op.getExpr()->print(O, &MAI);
+ }
+}
+
+void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
+ const MCOperand &BaseReg = MI->getOperand(Op);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ const MCOperand &IndexReg = MI->getOperand(Op+2);
+ const MCOperand &DispSpec = MI->getOperand(Op+3);
+
+ O << '[';
+
+ bool NeedPlus = false;
+ if (BaseReg.getReg()) {
+ printOperand(MI, Op);
+ NeedPlus = true;
+ }
+
+ if (IndexReg.getReg()) {
+ if (NeedPlus) O << " + ";
+ if (ScaleVal != 1)
+ O << ScaleVal << '*';
+ printOperand(MI, Op+2);
+ NeedPlus = true;
+ }
+
+
+ if (!DispSpec.isImm()) {
+ if (NeedPlus) O << " + ";
+ assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+ DispSpec.getExpr()->print(O, &MAI);
+ } else {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+ if (NeedPlus) {
+ if (DispVal > 0)
+ O << " + ";
+ else {
+ O << " - ";
+ DispVal = -DispVal;
+ }
+ }
+ O << DispVal;
+ }
+ }
+
+ O << ']';
+}
+
+void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op) {
+ // If this has a segment register, print it.
+ if (MI->getOperand(Op+4).getReg()) {
+ printOperand(MI, Op+4);
+ O << ':';
+ }
+ printLeaMemReference(MI, Op);
+}
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
new file mode 100644
index 0000000..1976177
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
@@ -0,0 +1,99 @@
+//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to intel style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INTEL_INST_PRINTER_H
+#define X86_INTEL_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+ class MCOperand;
+
+class X86IntelInstPrinter : public MCInstPrinter {
+public:
+ X86IntelInstPrinter(raw_ostream &O, const MCAsmInfo &MAI)
+ : MCInstPrinter(O, MAI) {}
+
+ virtual void printInst(const MCInst *MI);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void printOperand(const MCInst *MI, unsigned OpNo,
+ const char *Modifier = 0);
+ void printMemReference(const MCInst *MI, unsigned Op);
+ void printLeaMemReference(const MCInst *MI, unsigned Op);
+ void printSSECC(const MCInst *MI, unsigned Op);
+ void print_pcrel_imm(const MCInst *MI, unsigned OpNo);
+
+ void printopaquemem(const MCInst *MI, unsigned OpNo) {
+ O << "OPAQUE PTR ";
+ printMemReference(MI, OpNo);
+ }
+
+ void printi8mem(const MCInst *MI, unsigned OpNo) {
+ O << "BYTE PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi16mem(const MCInst *MI, unsigned OpNo) {
+ O << "WORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi32mem(const MCInst *MI, unsigned OpNo) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi64mem(const MCInst *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi128mem(const MCInst *MI, unsigned OpNo) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf32mem(const MCInst *MI, unsigned OpNo) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf64mem(const MCInst *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf80mem(const MCInst *MI, unsigned OpNo) {
+ O << "XWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf128mem(const MCInst *MI, unsigned OpNo) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printlea32mem(const MCInst *MI, unsigned OpNo) {
+ O << "DWORD PTR ";
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64mem(const MCInst *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64_32mem(const MCInst *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printLeaMemReference(MI, OpNo);
+ }
+};
+
+}
+
+#endif
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
new file mode 100644
index 0000000..5ccddf5
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -0,0 +1,485 @@
+//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower X86 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCInstLower.h"
+#include "X86AsmPrinter.h"
+#include "X86MCAsmInfo.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+
+const X86Subtarget &X86MCInstLower::getSubtarget() const {
+ return AsmPrinter.getSubtarget();
+}
+
+MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
+ assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin");
+ return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+MCSymbol *X86MCInstLower::GetPICBaseSymbol() const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix()
+ << AsmPrinter.getFunctionNumber() << "$pb";
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+
+/// LowerGlobalAddressOperand - Lower an MO_GlobalAddress operand to an
+/// MCOperand.
+MCSymbol *X86MCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+ const GlobalValue *GV = MO.getGlobal();
+
+ bool isImplicitlyPrivate = false;
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+ isImplicitlyPrivate = true;
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+
+ if (getSubtarget().isTargetCygMing()) {
+ X86COFFMachineModuleInfo &COFFMMI =
+ AsmPrinter.MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+ COFFMMI.DecorateCygMingName(Name, GV, *AsmPrinter.TM.getTargetData());
+ }
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case X86II::MO_NO_FLAG: // No flag.
+ case X86II::MO_PIC_BASE_OFFSET: // Doesn't modify symbol name.
+ break;
+ case X86II::MO_DLLIMPORT: {
+ // Handle dllimport linkage.
+ const char *Prefix = "__imp_";
+ Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix));
+ break;
+ }
+ case X86II::MO_DARWIN_NONLAZY:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
+ Name += "$non_lazy_ptr";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+
+ const MCSymbol *&StubSym = getMachOMMI().getGVStubEntry(Sym);
+ if (StubSym == 0) {
+ Name.clear();
+ Mang->getNameWithPrefix(Name, GV, false);
+ StubSym = Ctx.GetOrCreateSymbol(Name.str());
+ }
+ return Sym;
+ }
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
+ Name += "$non_lazy_ptr";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ const MCSymbol *&StubSym = getMachOMMI().getHiddenGVStubEntry(Sym);
+ if (StubSym == 0) {
+ Name.clear();
+ Mang->getNameWithPrefix(Name, GV, false);
+ StubSym = Ctx.GetOrCreateSymbol(Name.str());
+ }
+ return Sym;
+ }
+ case X86II::MO_DARWIN_STUB: {
+ Name += "$stub";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym);
+ if (StubSym == 0) {
+ Name.clear();
+ Mang->getNameWithPrefix(Name, GV, false);
+ StubSym = Ctx.GetOrCreateSymbol(Name.str());
+ }
+ return Sym;
+ }
+ // FIXME: These probably should be a modifier on the symbol or something??
+ case X86II::MO_TLSGD: Name += "@TLSGD"; break;
+ case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break;
+ case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break;
+ case X86II::MO_TPOFF: Name += "@TPOFF"; break;
+ case X86II::MO_NTPOFF: Name += "@NTPOFF"; break;
+ case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break;
+ case X86II::MO_GOT: Name += "@GOT"; break;
+ case X86II::MO_GOTOFF: Name += "@GOTOFF"; break;
+ case X86II::MO_PLT: Name += "@PLT"; break;
+ }
+
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *X86MCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+ SmallString<128> Name;
+ Name += AsmPrinter.MAI->getGlobalPrefix();
+ Name += MO.getSymbolName();
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case X86II::MO_NO_FLAG: // No flag.
+ case X86II::MO_GOT_ABSOLUTE_ADDRESS: // Doesn't modify symbol name.
+ case X86II::MO_PIC_BASE_OFFSET: // Doesn't modify symbol name.
+ break;
+ case X86II::MO_DLLIMPORT: {
+ // Handle dllimport linkage.
+ const char *Prefix = "__imp_";
+ Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix));
+ break;
+ }
+ case X86II::MO_DARWIN_STUB: {
+ Name += "$stub";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym);
+
+ if (StubSym == 0) {
+ Name.erase(Name.end()-5, Name.end());
+ StubSym = Ctx.GetOrCreateSymbol(Name.str());
+ }
+ return Sym;
+ }
+ // FIXME: These probably should be a modifier on the symbol or something??
+ case X86II::MO_TLSGD: Name += "@TLSGD"; break;
+ case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break;
+ case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break;
+ case X86II::MO_TPOFF: Name += "@TPOFF"; break;
+ case X86II::MO_NTPOFF: Name += "@NTPOFF"; break;
+ case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break;
+ case X86II::MO_GOT: Name += "@GOT"; break;
+ case X86II::MO_GOTOFF: Name += "@GOTOFF"; break;
+ case X86II::MO_PLT: Name += "@PLT"; break;
+ }
+
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *X86MCInstLower::GetJumpTableSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "JTI"
+ << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex();
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+ case X86II::MO_NO_FLAG: // No flag.
+ case X86II::MO_PIC_BASE_OFFSET:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+ break;
+ // FIXME: These probably should be a modifier on the symbol or something??
+ case X86II::MO_TLSGD: Name += "@TLSGD"; break;
+ case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break;
+ case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break;
+ case X86II::MO_TPOFF: Name += "@TPOFF"; break;
+ case X86II::MO_NTPOFF: Name += "@NTPOFF"; break;
+ case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break;
+ case X86II::MO_GOT: Name += "@GOT"; break;
+ case X86II::MO_GOTOFF: Name += "@GOTOFF"; break;
+ case X86II::MO_PLT: Name += "@PLT"; break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+
+MCSymbol *X86MCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "CPI"
+ << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex();
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+ case X86II::MO_NO_FLAG: // No flag.
+ case X86II::MO_PIC_BASE_OFFSET:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+ break;
+ // FIXME: These probably should be a modifier on the symbol or something??
+ case X86II::MO_TLSGD: Name += "@TLSGD"; break;
+ case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break;
+ case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break;
+ case X86II::MO_TPOFF: Name += "@TPOFF"; break;
+ case X86II::MO_NTPOFF: Name += "@NTPOFF"; break;
+ case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break;
+ case X86II::MO_GOT: Name += "@GOT"; break;
+ case X86II::MO_GOTOFF: Name += "@GOTOFF"; break;
+ case X86II::MO_PLT: Name += "@PLT"; break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case X86II::MO_NO_FLAG: // No flag.
+
+ // These affect the name of the symbol, not any suffix.
+ case X86II::MO_DARWIN_NONLAZY:
+ case X86II::MO_DLLIMPORT:
+ case X86II::MO_DARWIN_STUB:
+ case X86II::MO_TLSGD:
+ case X86II::MO_GOTTPOFF:
+ case X86II::MO_INDNTPOFF:
+ case X86II::MO_TPOFF:
+ case X86II::MO_NTPOFF:
+ case X86II::MO_GOTPCREL:
+ case X86II::MO_GOT:
+ case X86II::MO_GOTOFF:
+ case X86II::MO_PLT:
+ break;
+ case X86II::MO_PIC_BASE_OFFSET:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+ // Subtract the pic base.
+ Expr = MCBinaryExpr::CreateSub(Expr,
+ MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx),
+ Ctx);
+ break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+
+
+static void lower_subreg32(MCInst *MI, unsigned OpNo) {
+ // Convert registers in the addr mode according to subreg32.
+ unsigned Reg = MI->getOperand(OpNo).getReg();
+ if (Reg != 0)
+ MI->getOperand(OpNo).setReg(getX86SubSuperRegister(Reg, MVT::i32));
+}
+
+static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
+ // Convert registers in the addr mode according to subreg64.
+ for (unsigned i = 0; i != 4; ++i) {
+ if (!MI->getOperand(OpNo+i).isReg()) continue;
+
+ unsigned Reg = MI->getOperand(OpNo+i).getReg();
+ if (Reg == 0) continue;
+
+ MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
+ }
+}
+
+
+
+void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ AsmPrinter.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+
+ // Handle a few special cases to eliminate operand modifiers.
+ switch (OutMI.getOpcode()) {
+ case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
+ lower_lea64_32mem(&OutMI, 1);
+ break;
+ case X86::MOV16r0:
+ OutMI.setOpcode(X86::MOV32r0);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOVZX16rr8:
+ OutMI.setOpcode(X86::MOVZX32rr8);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOVZX16rm8:
+ OutMI.setOpcode(X86::MOVZX32rm8);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOVSX16rr8:
+ OutMI.setOpcode(X86::MOVSX32rr8);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOVSX16rm8:
+ OutMI.setOpcode(X86::MOVSX32rm8);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOVZX64rr32:
+ OutMI.setOpcode(X86::MOV32rr);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOVZX64rm32:
+ OutMI.setOpcode(X86::MOV32rm);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOV64ri64i32:
+ OutMI.setOpcode(X86::MOV32ri);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOVZX64rr8:
+ OutMI.setOpcode(X86::MOVZX32rr8);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOVZX64rm8:
+ OutMI.setOpcode(X86::MOVZX32rm8);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOVZX64rr16:
+ OutMI.setOpcode(X86::MOVZX32rr16);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOVZX64rm16:
+ OutMI.setOpcode(X86::MOVZX32rm16);
+ lower_subreg32(&OutMI, 0);
+ break;
+ }
+}
+
+
+
+void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
+ X86MCInstLower MCInstLowering(OutContext, Mang, *this);
+ switch (MI->getOpcode()) {
+ case TargetInstrInfo::DBG_LABEL:
+ case TargetInstrInfo::EH_LABEL:
+ case TargetInstrInfo::GC_LABEL:
+ printLabel(MI);
+ return;
+ case TargetInstrInfo::INLINEASM:
+ O << '\t';
+ printInlineAsm(MI);
+ return;
+ case TargetInstrInfo::IMPLICIT_DEF:
+ printImplicitDef(MI);
+ return;
+ case TargetInstrInfo::KILL:
+ return;
+ case X86::MOVPC32r: {
+ MCInst TmpInst;
+ // This is a pseudo op for a two instruction sequence with a label, which
+ // looks like:
+ // call "L1$pb"
+ // "L1$pb":
+ // popl %esi
+
+ // Emit the call.
+ MCSymbol *PICBase = MCInstLowering.GetPICBaseSymbol();
+ TmpInst.setOpcode(X86::CALLpcrel32);
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
+ OutContext)));
+ printMCInst(&TmpInst);
+ O << '\n';
+
+ // Emit the label.
+ OutStreamer.EmitLabel(PICBase);
+
+ // popl $reg
+ TmpInst.setOpcode(X86::POP32r);
+ TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
+ printMCInst(&TmpInst);
+ return;
+ }
+
+ case X86::ADD32ri: {
+ // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
+ if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ break;
+
+ // Okay, we have something like:
+ // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
+
+ // For this, we want to print something like:
+ // MYGLOBAL + (. - PICBASE)
+ // However, we can't generate a ".", so just emit a new label here and refer
+ // to it. We know that this operand flag occurs at most once per function.
+ SmallString<64> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+ << "picbaseref" << getFunctionNumber();
+ MCSymbol *DotSym = OutContext.GetOrCreateSymbol(Name.str());
+ OutStreamer.EmitLabel(DotSym);
+
+ // Now that we have emitted the label, lower the complex operand expression.
+ MCSymbol *OpSym = MCInstLowering.GetExternalSymbolSymbol(MI->getOperand(2));
+
+ const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+ const MCExpr *PICBase =
+ MCSymbolRefExpr::Create(MCInstLowering.GetPICBaseSymbol(), OutContext);
+ DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
+
+ DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
+ DotExpr, OutContext);
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::ADD32ri);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ TmpInst.addOperand(MCOperand::CreateExpr(DotExpr));
+ printMCInst(&TmpInst);
+ return;
+ }
+ }
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+
+
+ printMCInst(&TmpInst);
+}
+
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
new file mode 100644
index 0000000..fa25b90
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
@@ -0,0 +1,54 @@
+//===-- X86MCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_MCINSTLOWER_H
+#define X86_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MCSymbol;
+ class MachineInstr;
+ class MachineModuleInfoMachO;
+ class MachineOperand;
+ class Mangler;
+ class X86AsmPrinter;
+ class X86Subtarget;
+
+/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
+class VISIBILITY_HIDDEN X86MCInstLower {
+ MCContext &Ctx;
+ Mangler *Mang;
+ X86AsmPrinter &AsmPrinter;
+
+ const X86Subtarget &getSubtarget() const;
+public:
+ X86MCInstLower(MCContext &ctx, Mangler *mang, X86AsmPrinter &asmprinter)
+ : Ctx(ctx), Mang(mang), AsmPrinter(asmprinter) {}
+
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ MCSymbol *GetPICBaseSymbol() const;
+
+ MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+private:
+ MachineModuleInfoMachO &getMachOMMI() const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 7ea0e51..3ad65fb 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -7,13 +7,15 @@ tablegen(X86GenInstrNames.inc -gen-instr-enums)
tablegen(X86GenInstrInfo.inc -gen-instr-desc)
tablegen(X86GenAsmWriter.inc -gen-asm-writer)
tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
+tablegen(X86GenAsmMatcher.inc -gen-asm-matcher)
tablegen(X86GenDAGISel.inc -gen-dag-isel)
tablegen(X86GenFastISel.inc -gen-fast-isel)
tablegen(X86GenCallingConv.inc -gen-callingconv)
tablegen(X86GenSubtarget.inc -gen-subtarget)
-add_llvm_target(X86CodeGen
+set(sources
X86CodeEmitter.cpp
+ X86COFFMachineModuleInfo.cpp
X86ELFWriterInfo.cpp
X86FloatingPoint.cpp
X86FloatingPointRegKill.cpp
@@ -21,11 +23,19 @@ add_llvm_target(X86CodeGen
X86ISelLowering.cpp
X86InstrInfo.cpp
X86JITInfo.cpp
+ X86MCAsmInfo.cpp
X86RegisterInfo.cpp
X86Subtarget.cpp
- X86TargetAsmInfo.cpp
X86TargetMachine.cpp
+ X86TargetObjectFile.cpp
X86FastISel.cpp
)
+if( CMAKE_CL_64 )
+ enable_language(ASM_MASM)
+ set(sources ${sources} X86CompilationCallback_Win64.asm)
+endif()
+
+add_llvm_target(X86CodeGen ${sources})
+
target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index 44f1c5d..220831d 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -13,11 +13,11 @@ TARGET = X86
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
X86GenRegisterInfo.inc X86GenInstrNames.inc \
- X86GenInstrInfo.inc X86GenAsmWriter.inc \
+ X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \
X86GenAsmWriter1.inc X86GenDAGISel.inc \
X86GenFastISel.inc \
X86GenCallingConv.inc X86GenSubtarget.inc
-DIRS = AsmPrinter
+DIRS = AsmPrinter AsmParser TargetInfo
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index ad12137..e8f7c5d 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -249,3 +249,52 @@ lowered return value, and it would free non-C frontends from a
complication only required by a C-based ABI.
//===---------------------------------------------------------------------===//
+
+We get a redundant zero extension for code like this:
+
+int mask[1000];
+int foo(unsigned x) {
+ if (x < 10)
+ x = x * 45;
+ else
+ x = x * 78;
+ return mask[x];
+}
+
+_foo:
+LBB1_0: ## entry
+ cmpl $9, %edi
+ jbe LBB1_3 ## bb
+LBB1_1: ## bb1
+ imull $78, %edi, %eax
+LBB1_2: ## bb2
+ movl %eax, %eax <----
+ movq _mask@GOTPCREL(%rip), %rcx
+ movl (%rcx,%rax,4), %eax
+ ret
+LBB1_3: ## bb
+ imull $45, %edi, %eax
+ jmp LBB1_2 ## bb2
+
+Before regalloc, we have:
+
+ %reg1025<def> = IMUL32rri8 %reg1024, 45, %EFLAGS<imp-def>
+ JMP mbb<bb2,0x203afb0>
+ Successors according to CFG: 0x203afb0 (#3)
+
+bb1: 0x203af60, LLVM BB @0x1e02310, ID#2:
+ Predecessors according to CFG: 0x203aec0 (#0)
+ %reg1026<def> = IMUL32rri8 %reg1024, 78, %EFLAGS<imp-def>
+ Successors according to CFG: 0x203afb0 (#3)
+
+bb2: 0x203afb0, LLVM BB @0x1e02340, ID#3:
+ Predecessors according to CFG: 0x203af10 (#1) 0x203af60 (#2)
+ %reg1027<def> = PHI %reg1025, mbb<bb,0x203af10>,
+ %reg1026, mbb<bb1,0x203af60>
+ %reg1029<def> = MOVZX64rr32 %reg1027
+
+so we'd have to know that IMUL32rri8 leaves the high word zero extended and to
+be able to recognize the zero extend. This could also presumably be implemented
+if we have whole-function selectiondags.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 4464878..046d35c 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1932,3 +1932,23 @@ Replacing an icmp+select with a shift should always be considered profitable in
instcombine.
//===---------------------------------------------------------------------===//
+
+Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch
+properly.
+
+When the return value is not used (i.e. only care about the value in the
+memory), x86 does not have to use add to implement these. Instead, it can use
+add, sub, inc, dec instructions with the "lock" prefix.
+
+This is currently implemented using a bit of instruction selection trick. The
+issue is the target independent pattern produces one output and a chain and we
+want to map it into one that just output a chain. The current trick is to select
+it into a MERGE_VALUES with the first definition being an implicit_def. The
+proper solution is to add new ISD opcodes for the no-output variant. DAG
+combiner can then transform the node before it gets to target node selection.
+
+Problem #2 is we are adding a whole bunch of x86 atomic instructions when in
+fact these instructions are identical to the non-lock versions. We need a way to
+add target specific information to target nodes and have this information
+carried over to machine instructions. Asm printer (or JIT) can use this
+information to add the "lock" prefix.
diff --git a/lib/Target/X86/TargetInfo/CMakeLists.txt b/lib/Target/X86/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..90be9f5
--- /dev/null
+++ b/lib/Target/X86/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Info
+ X86TargetInfo.cpp
+ )
+
+add_dependencies(LLVMX86Info X86CodeGenTable_gen)
diff --git a/lib/Target/X86/TargetInfo/Makefile b/lib/Target/X86/TargetInfo/Makefile
new file mode 100644
index 0000000..6677d4b
--- /dev/null
+++ b/lib/Target/X86/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/TargetInfo/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
new file mode 100644
index 0000000..08d4d84
--- /dev/null
+++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- X86TargetInfo.cpp - X86 Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheX86_32Target, llvm::TheX86_64Target;
+
+extern "C" void LLVMInitializeX86TargetInfo() {
+ RegisterTarget<Triple::x86, /*HasJIT=*/true>
+ X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above");
+
+ RegisterTarget<Triple::x86_64, /*HasJIT=*/true>
+ Y(TheX86_64Target, "x86-64", "64-bit X86: EM64T and AMD64");
+}
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 22de3f6..a167118 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -22,8 +22,10 @@ namespace llvm {
class X86TargetMachine;
class FunctionPass;
class MachineCodeEmitter;
+class MCCodeEmitter;
class JITCodeEmitter;
-class raw_ostream;
+class Target;
+class formatted_raw_ostream;
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
@@ -42,13 +44,6 @@ FunctionPass *createX86FloatingPointStackifierPass();
///
FunctionPass *createX87FPRegKillInserterPass();
-/// createX86CodePrinterPass - Returns a pass that prints the X86
-/// assembly code for a MachineFunction to the given output stream,
-/// using the given target machine description.
-///
-FunctionPass *createX86CodePrinterPass(raw_ostream &o, X86TargetMachine &tm,
- bool Verbose);
-
/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
/// to the specified MCE object.
@@ -56,6 +51,10 @@ FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM,
MachineCodeEmitter &MCE);
FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
JITCodeEmitter &JCE);
+FunctionPass *createX86ObjectCodeEmitterPass(X86TargetMachine &TM,
+ ObjectCodeEmitter &OCE);
+
+MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM);
/// createX86EmitCodeToMemory - Returns a pass that converts a register
/// allocated function into raw machine code in a dynamically
@@ -68,6 +67,8 @@ FunctionPass *createEmitX86CodeToMemory();
///
FunctionPass *createX86MaxStackAlignmentCalculatorPass();
+extern Target TheX86_32Target, TheX86_64Target;
+
} // End llvm namespace
// Defines symbolic names for X86 registers. This defines a mapping from
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 47861d5..da467fe 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -19,12 +19,17 @@ include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// X86 Subtarget features.
//===----------------------------------------------------------------------===//
-
+
+def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
+ "Enable conditional move instructions">;
+
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
"Enable MMX instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
"Enable SSE instructions",
- [FeatureMMX]>;
+ // SSE codegen depends on cmovs, and all
+ // SSE1+ processors support them.
+ [FeatureMMX, FeatureCMOV]>;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
[FeatureSSE1]>;
@@ -76,8 +81,8 @@ def : Proc<"i586", []>;
def : Proc<"pentium", []>;
def : Proc<"pentium-mmx", [FeatureMMX]>;
def : Proc<"i686", []>;
-def : Proc<"pentiumpro", []>;
-def : Proc<"pentium2", [FeatureMMX]>;
+def : Proc<"pentiumpro", [FeatureCMOV]>;
+def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>;
def : Proc<"pentium3", [FeatureSSE1]>;
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
@@ -178,21 +183,34 @@ include "X86CallingConv.td"
// Assembly Printers
//===----------------------------------------------------------------------===//
+// Currently the X86 assembly parser only supports ATT syntax.
+def ATTAsmParser : AsmParser {
+ string AsmParserClassName = "ATTAsmParser";
+ int Variant = 0;
+
+ // Discard comments in assembly strings.
+ string CommentDelimiter = "#";
+
+ // Recognize hard coded registers.
+ string RegisterPrefix = "%";
+}
+
// The X86 target supports two different syntaxes for emitting machine code.
// This is controlled by the -x86-asm-syntax={att|intel}
def ATTAsmWriter : AsmWriter {
- string AsmWriterClassName = "ATTAsmPrinter";
+ string AsmWriterClassName = "ATTInstPrinter";
int Variant = 0;
}
def IntelAsmWriter : AsmWriter {
- string AsmWriterClassName = "IntelAsmPrinter";
+ string AsmWriterClassName = "IntelInstPrinter";
int Variant = 1;
}
-
def X86 : Target {
// Information about the instructions...
let InstructionSet = X86InstrInfo;
+ let AssemblyParsers = [ATTAsmParser];
+
let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
}
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
new file mode 100644
index 0000000..01c4fcf
--- /dev/null
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -0,0 +1,123 @@
+//===-- llvm/CodeGen/X86COFFMachineModuleInfo.cpp -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86COFFMachineModuleInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) {
+}
+X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
+
+}
+
+void X86COFFMachineModuleInfo::AddFunctionInfo(const Function *F,
+ const X86MachineFunctionInfo &Val) {
+ FunctionInfoMap[F] = Val;
+}
+
+
+
+static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
+ const TargetData &TD) {
+ X86MachineFunctionInfo Info;
+ uint64_t Size = 0;
+
+ switch (F->getCallingConv()) {
+ case CallingConv::X86_StdCall:
+ Info.setDecorationStyle(StdCall);
+ break;
+ case CallingConv::X86_FastCall:
+ Info.setDecorationStyle(FastCall);
+ break;
+ default:
+ return Info;
+ }
+
+ unsigned argNum = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI, ++argNum) {
+ const Type* Ty = AI->getType();
+
+ // 'Dereference' type in case of byval parameter attribute
+ if (F->paramHasAttr(argNum, Attribute::ByVal))
+ Ty = cast<PointerType>(Ty)->getElementType();
+
+ // Size should be aligned to DWORD boundary
+ Size += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+ }
+
+ // We're not supporting tooooo huge arguments :)
+ Info.setBytesToPopOnReturn((unsigned int)Size);
+ return Info;
+}
+
+
+/// DecorateCygMingName - Query FunctionInfoMap and use this information for
+/// various name decorations for Cygwin and MingW.
+void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl<char> &Name,
+ const GlobalValue *GV,
+ const TargetData &TD) {
+ const Function *F = dyn_cast<Function>(GV);
+ if (!F) return;
+
+ // Save function name for later type emission.
+ if (F->isDeclaration())
+ CygMingStubs.insert(StringRef(Name.data(), Name.size()));
+
+ // We don't want to decorate non-stdcall or non-fastcall functions right now
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall)
+ return;
+
+ const X86MachineFunctionInfo *Info;
+
+ FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F);
+ if (info_item == FunctionInfoMap.end()) {
+ // Calculate apropriate function info and populate map
+ FunctionInfoMap[F] = calculateFunctionInfo(F, TD);
+ Info = &FunctionInfoMap[F];
+ } else {
+ Info = &info_item->second;
+ }
+
+ if (Info->getDecorationStyle() == None) return;
+ const FunctionType *FT = F->getFunctionType();
+
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || FT->getNumParams() == 0 ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr()))
+ raw_svector_ostream(Name) << '@' << Info->getBytesToPopOnReturn();
+
+ if (Info->getDecorationStyle() == FastCall) {
+ if (Name[0] == '_')
+ Name[0] = '@';
+ else
+ Name.insert(Name.begin(), '@');
+ }
+}
+
+/// DecorateCygMingName - Query FunctionInfoMap and use this information for
+/// various name decorations for Cygwin and MingW.
+void X86COFFMachineModuleInfo::DecorateCygMingName(std::string &Name,
+ const GlobalValue *GV,
+ const TargetData &TD) {
+ SmallString<128> NameStr(Name.begin(), Name.end());
+ DecorateCygMingName(NameStr, GV, TD);
+ Name.assign(NameStr.begin(), NameStr.end());
+}
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
new file mode 100644
index 0000000..afd5525
--- /dev/null
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -0,0 +1,67 @@
+//===-- llvm/CodeGen/X86COFFMachineModuleInfo.h -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86COFF_MACHINEMODULEINFO_H
+#define X86COFF_MACHINEMODULEINFO_H
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/StringSet.h"
+
+namespace llvm {
+ class X86MachineFunctionInfo;
+ class TargetData;
+
+/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
+/// for X86 COFF targets.
+class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
+ StringSet<> CygMingStubs;
+
+ // We have to propagate some information about MachineFunction to
+ // AsmPrinter. It's ok, when we're printing the function, since we have
+ // access to MachineFunction and can get the appropriate MachineFunctionInfo.
+ // Unfortunately, this is not possible when we're printing reference to
+ // Function (e.g. calling it and so on). Even more, there is no way to get the
+ // corresponding MachineFunctions: it can even be not created at all. That's
+ // why we should use additional structure, when we're collecting all necessary
+ // information.
+ //
+ // This structure is using e.g. for name decoration for stdcall & fastcall'ed
+ // function, since we have to use arguments' size for decoration.
+ typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap;
+ FMFInfoMap FunctionInfoMap;
+
+public:
+ X86COFFMachineModuleInfo(const MachineModuleInfo &);
+ ~X86COFFMachineModuleInfo();
+
+
+ void DecorateCygMingName(std::string &Name, const GlobalValue *GV,
+ const TargetData &TD);
+ void DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV,
+ const TargetData &TD);
+
+ void AddFunctionInfo(const Function *F, const X86MachineFunctionInfo &Val);
+
+
+ typedef StringSet<>::const_iterator stub_iterator;
+ stub_iterator stub_begin() const { return CygMingStubs.begin(); }
+ stub_iterator stub_end() const { return CygMingStubs.end(); }
+
+
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index e9fcbd5..d77f039 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -89,7 +89,7 @@ def RetCC_X86_64_C : CallingConv<[
// X86-Win64 C return-value convention.
def RetCC_X86_Win64_C : CallingConv<[
// The X86-Win64 calling convention always returns __m64 values in RAX.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[RAX]>>,
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>,
// And FP in XMM0 only.
CCIfType<[f32], CCAssignToReg<[XMM0]>>,
@@ -137,26 +137,26 @@ def CC_X86_64_C : CallingConv<[
// The 'nest' parameter, if any, is passed in R10.
CCIfNest<CCAssignToReg<[R10]>>,
+ // The first 6 v1i64 vector arguments are passed in GPRs on Darwin.
+ CCIfType<[v1i64],
+ CCIfSubtarget<"isTargetDarwin()",
+ CCBitConvertToType<i64>>>,
+
// The first 6 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
-
- // The first 8 FP/Vector arguments are passed in XMM registers.
- CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfSubtarget<"hasSSE1()",
- CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
// The first 8 MMX (except for v1i64) vector arguments are passed in XMM
// registers on Darwin.
CCIfType<[v8i8, v4i16, v2i32, v2f32],
CCIfSubtarget<"isTargetDarwin()",
CCIfSubtarget<"hasSSE2()",
- CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>>,
+ CCPromoteToType<v2i64>>>>,
- // The first 8 v1i64 vector arguments are passed in GPRs on Darwin.
- CCIfType<[v1i64],
- CCIfSubtarget<"isTargetDarwin()",
- CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
+ // The first 8 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasSSE1()",
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
@@ -184,6 +184,13 @@ def CC_X86_Win64_C : CallingConv<[
// The 'nest' parameter, if any, is passed in R10.
CCIfNest<CCAssignToReg<[R10]>>,
+ // 128 bit vectors are passed by pointer
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
+
+ // The first 4 MMX vector arguments are passed in GPRs.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32],
+ CCBitConvertToType<i64>>,
+
// The first 4 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
[XMM0, XMM1, XMM2, XMM3]>>,
@@ -195,24 +202,16 @@ def CC_X86_Win64_C : CallingConv<[
CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
[RCX , RDX , R8 , R9 ]>>,
- // The first 4 MMX vector arguments are passed in GPRs.
- CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32],
- CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
- [XMM0, XMM1, XMM2, XMM3]>>,
-
// Integer/FP values get stored in stack slots that are 8 bytes in size and
- // 16-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 16>>,
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
// Long doubles get stack slots whose size and alignment depends on the
// subtarget.
CCIfType<[f80], CCAssignToStack<0, 0>>,
- // Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
-
- // __m64 vectors get 8-byte stack slots that are 16-byte aligned.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>>
+ // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index d5846a0..f942f3f 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -22,21 +22,27 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineCodeEmitter.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Function.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
STATISTIC(NumEmitted, "Number of machine instructions emitted");
namespace {
-template<class CodeEmitter>
+ template<class CodeEmitter>
class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass {
const X86InstrInfo *II;
const TargetData *TD;
@@ -67,6 +73,7 @@ template<class CodeEmitter>
const TargetInstrDesc *Desc);
void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
AU.addRequired<MachineModuleInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -83,7 +90,7 @@ template<class CodeEmitter>
intptr_t PCAdj = 0);
void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
- intptr_t PCAdj = 0);
+ intptr_t Adj = 0, bool IsPCRel = true);
void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
void emitRegModRMByte(unsigned RegOpcodeField);
@@ -95,29 +102,27 @@ template<class CodeEmitter>
intptr_t PCAdj = 0);
unsigned getX86RegNum(unsigned RegNo) const;
-
- bool gvNeedsNonLazyPtr(const GlobalValue *GV);
};
template<class CodeEmitter>
char Emitter<CodeEmitter>::ID = 0;
-}
+} // end anonymous namespace.
/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
/// to the specified templated MachineCodeEmitter object.
-namespace llvm {
-
-FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM,
- MachineCodeEmitter &MCE) {
+FunctionPass *llvm::createX86CodeEmitterPass(X86TargetMachine &TM,
+ MachineCodeEmitter &MCE) {
return new Emitter<MachineCodeEmitter>(TM, MCE);
}
-FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
- JITCodeEmitter &JCE) {
+FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
+ JITCodeEmitter &JCE) {
return new Emitter<JITCodeEmitter>(TM, JCE);
}
-
-} // end namespace llvm
+FunctionPass *llvm::createX86ObjectCodeEmitterPass(X86TargetMachine &TM,
+ ObjectCodeEmitter &OCE) {
+ return new Emitter<ObjectCodeEmitter>(TM, OCE);
+}
template<class CodeEmitter>
bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
@@ -130,7 +135,8 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
IsPIC = TM.getRelocationModel() == Reloc::PIC_;
do {
- DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n";
+ DEBUG(errs() << "JITTing function '"
+ << MF.getFunction()->getName() << "'\n");
MCE.startFunction(MF);
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
@@ -172,7 +178,7 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
intptr_t PCAdj /* = 0 */,
bool NeedStub /* = false */,
bool Indirect /* = false */) {
- intptr_t RelocCST = 0;
+ intptr_t RelocCST = Disp;
if (Reloc == X86::reloc_picrel_word)
RelocCST = PICBaseOffset;
else if (Reloc == X86::reloc_pcrel_word)
@@ -291,53 +297,61 @@ static bool isDisp8(int Value) {
return Value == (signed char)Value;
}
-template<class CodeEmitter>
-bool Emitter<CodeEmitter>::gvNeedsNonLazyPtr(const GlobalValue *GV) {
- // For Darwin, simulate the linktime GOT by using the same non-lazy-pointer
+static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp,
+ const TargetMachine &TM) {
+ // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer
// mechanism as 32-bit mode.
- return (!Is64BitMode || TM.getSubtarget<X86Subtarget>().isTargetDarwin()) &&
- TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false);
+ if (TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ !TM.getSubtarget<X86Subtarget>().isTargetDarwin())
+ return false;
+
+ // Return true if this is a reference to a stub containing the address of the
+ // global, not the global itself.
+ return isGlobalStubReference(GVOp.getTargetFlags());
}
template<class CodeEmitter>
void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
- int DispVal, intptr_t PCAdj) {
+ int DispVal,
+ intptr_t Adj /* = 0 */,
+ bool IsPCRel /* = true */) {
// If this is a simple integer displacement that doesn't require a relocation,
// emit it now.
if (!RelocOp) {
emitConstant(DispVal, 4);
return;
}
-
+
// Otherwise, this is something that requires a relocation. Emit it as such
// now.
+ unsigned RelocType = Is64BitMode ?
+ (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext)
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
if (RelocOp->isGlobal()) {
// In 64-bit static small code model, we could potentially emit absolute.
- // But it's probably not beneficial.
+ // But it's probably not beneficial. If the MCE supports using RIP directly
+ // do it, otherwise fallback to absolute (this is determined by IsPCRel).
// 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
// 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
bool NeedStub = isa<Function>(RelocOp->getGlobal());
- bool Indirect = gvNeedsNonLazyPtr(RelocOp->getGlobal());
- emitGlobalAddress(RelocOp->getGlobal(), rt, RelocOp->getOffset(),
- PCAdj, NeedStub, Indirect);
+ bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
+ emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
+ Adj, NeedStub, Indirect);
+ } else if (RelocOp->isSymbol()) {
+ emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
} else if (RelocOp->isCPI()) {
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word;
- emitConstPoolAddress(RelocOp->getIndex(), rt,
- RelocOp->getOffset(), PCAdj);
- } else if (RelocOp->isJTI()) {
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word;
- emitJumpTableAddress(RelocOp->getIndex(), rt, PCAdj);
+ emitConstPoolAddress(RelocOp->getIndex(), RelocType,
+ RelocOp->getOffset(), Adj);
} else {
- assert(0 && "Unknown value to relocate!");
+ assert(RelocOp->isJTI() && "Unexpected machine operand!");
+ emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj);
}
}
template<class CodeEmitter>
void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
- unsigned Op, unsigned RegOpcodeField,
- intptr_t PCAdj) {
+ unsigned Op,unsigned RegOpcodeField,
+ intptr_t PCAdj) {
const MachineOperand &Op3 = MI.getOperand(Op+3);
int DispVal = 0;
const MachineOperand *DispForReloc = 0;
@@ -345,15 +359,17 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
// Figure out what sort of displacement we have to handle here.
if (Op3.isGlobal()) {
DispForReloc = &Op3;
+ } else if (Op3.isSymbol()) {
+ DispForReloc = &Op3;
} else if (Op3.isCPI()) {
- if (Is64BitMode || IsPIC) {
+ if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
DispForReloc = &Op3;
} else {
DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex());
DispVal += Op3.getOffset();
}
} else if (Op3.isJTI()) {
- if (Is64BitMode || IsPIC) {
+ if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
DispForReloc = &Op3;
} else {
DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex());
@@ -368,17 +384,23 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
unsigned BaseReg = Base.getReg();
+ // Indicate that the displacement will use an pcrel or absolute reference
+ // by default. MCEs able to resolve addresses on-the-fly use pcrel by default
+ // while others, unless explicit asked to use RIP, use absolute references.
+ bool IsPCRel = MCE.earlyResolveAddresses() ? true : false;
+
// Is a SIB byte needed?
+ // If no BaseReg, issue a RIP relative instruction only if the MCE can
+ // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+ // 2-7) and absolute references.
if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
- IndexReg.getReg() == 0 &&
- (BaseReg == 0 || BaseReg == X86::RIP ||
- getX86RegNum(BaseReg) != N86::ESP)) {
- if (BaseReg == 0 ||
- BaseReg == X86::RIP) { // Just a displacement?
+ IndexReg.getReg() == 0 &&
+ ((BaseReg == 0 && MCE.earlyResolveAddresses()) || BaseReg == X86::RIP ||
+ (BaseReg != 0 && getX86RegNum(BaseReg) != N86::ESP))) {
+ if (BaseReg == 0 || BaseReg == X86::RIP) { // Just a displacement?
// Emit special case [disp32] encoding
MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
-
- emitDisplacementField(DispForReloc, DispVal, PCAdj);
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
} else {
unsigned BaseRegNo = getX86RegNum(BaseReg);
if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
@@ -391,7 +413,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
} else {
// Emit the most general non-SIB encoding: [REG+disp32]
MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
- emitDisplacementField(DispForReloc, DispVal, PCAdj);
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
}
}
@@ -427,13 +449,13 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
unsigned SS = SSTable[Scale.getImm()];
if (BaseReg == 0) {
- // Handle the SIB byte for the case where there is no base. The
- // displacement has already been output.
+ // Handle the SIB byte for the case where there is no base, see Intel
+ // Manual 2A, table 2-7. The displacement has already been output.
unsigned IndexRegNo;
if (IndexReg.getReg())
IndexRegNo = getX86RegNum(IndexReg.getReg());
- else
- IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+ IndexRegNo = 4;
emitSIBByte(SS, IndexRegNo, 5);
} else {
unsigned BaseRegNo = getX86RegNum(BaseReg);
@@ -449,21 +471,23 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
if (ForceDisp8) {
emitConstant(DispVal, 1);
} else if (DispVal != 0 || ForceDisp32) {
- emitDisplacementField(DispForReloc, DispVal, PCAdj);
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
}
}
}
template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitInstruction(
- const MachineInstr &MI,
- const TargetInstrDesc *Desc) {
- DOUT << MI;
+void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
+ const TargetInstrDesc *Desc) {
+ DEBUG(errs() << MI);
+
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
unsigned Opcode = Desc->Opcode;
// Emit the lock opcode prefix as needed.
- if (Desc->TSFlags & X86II::LOCK) MCE.emitByte(0xF0);
+ if (Desc->TSFlags & X86II::LOCK)
+ MCE.emitByte(0xF0);
// Emit segment override opcode prefix as needed.
switch (Desc->TSFlags & X86II::SegOvrMask) {
@@ -473,18 +497,21 @@ void Emitter<CodeEmitter>::emitInstruction(
case X86II::GS:
MCE.emitByte(0x65);
break;
- default: assert(0 && "Invalid segment!");
+ default: llvm_unreachable("Invalid segment!");
case 0: break; // No segment override!
}
// Emit the repeat opcode prefix as needed.
- if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3);
+ if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP)
+ MCE.emitByte(0xF3);
// Emit the operand size opcode prefix as needed.
- if (Desc->TSFlags & X86II::OpSize) MCE.emitByte(0x66);
+ if (Desc->TSFlags & X86II::OpSize)
+ MCE.emitByte(0x66);
// Emit the address size opcode prefix as needed.
- if (Desc->TSFlags & X86II::AdSize) MCE.emitByte(0x67);
+ if (Desc->TSFlags & X86II::AdSize)
+ MCE.emitByte(0x67);
bool Need0FPrefix = false;
switch (Desc->TSFlags & X86II::Op0Mask) {
@@ -493,6 +520,10 @@ void Emitter<CodeEmitter>::emitInstruction(
case X86II::TA: // 0F 3A
Need0FPrefix = true;
break;
+ case X86II::TF: // F2 0F 38
+ MCE.emitByte(0xF2);
+ Need0FPrefix = true;
+ break;
case X86II::REP: break; // already handled.
case X86II::XS: // F3 0F
MCE.emitByte(0xF3);
@@ -508,14 +539,13 @@ void Emitter<CodeEmitter>::emitInstruction(
(((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
>> X86II::Op0Shift));
break; // Two-byte opcode prefix
- default: assert(0 && "Invalid prefix!");
+ default: llvm_unreachable("Invalid prefix!");
case 0: break; // No prefix!
}
+ // Handle REX prefix.
if (Is64BitMode) {
- // REX prefix
- unsigned REX = X86InstrInfo::determineREX(MI);
- if (REX)
+ if (unsigned REX = X86InstrInfo::determineREX(MI))
MCE.emitByte(0x40 | REX);
}
@@ -524,7 +554,8 @@ void Emitter<CodeEmitter>::emitInstruction(
MCE.emitByte(0x0F);
switch (Desc->TSFlags & X86II::Op0Mask) {
- case X86II::T8: // 0F 38
+ case X86II::TF: // F2 0F 38
+ case X86II::T8: // 0F 38
MCE.emitByte(0x38);
break;
case X86II::TA: // 0F 3A
@@ -543,29 +574,29 @@ void Emitter<CodeEmitter>::emitInstruction(
unsigned char BaseOpcode = II->getBaseOpcodeFor(Desc);
switch (Desc->TSFlags & X86II::FormMask) {
- default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!");
+ default:
+ llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
case X86II::Pseudo:
// Remember the current PC offset, this is the PIC relocation
// base address.
switch (Opcode) {
default:
- assert(0 && "psuedo instructions should be removed before code emission");
+ llvm_unreachable("psuedo instructions should be removed before code"
+ " emission");
break;
- case TargetInstrInfo::INLINEASM: {
+ case TargetInstrInfo::INLINEASM:
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
- if (MI.getOperand(0).getSymbolName()[0]) {
- assert(0 && "JIT does not support inline asm!\n");
- abort();
- }
+ if (MI.getOperand(0).getSymbolName()[0])
+ llvm_report_error("JIT does not support inline asm!");
break;
- }
case TargetInstrInfo::DBG_LABEL:
case TargetInstrInfo::EH_LABEL:
+ case TargetInstrInfo::GC_LABEL:
MCE.emitLabel(MI.getOperand(0).getImm());
break;
case TargetInstrInfo::IMPLICIT_DEF:
- case TargetInstrInfo::DECLARE:
+ case TargetInstrInfo::KILL:
case X86::DWARF_LOC:
case X86::FP_REG_KILL:
break;
@@ -582,73 +613,86 @@ void Emitter<CodeEmitter>::emitInstruction(
}
CurOp = NumOps;
break;
- case X86II::RawFrm:
+ case X86II::RawFrm: {
MCE.emitByte(BaseOpcode);
- if (CurOp != NumOps) {
- const MachineOperand &MO = MI.getOperand(CurOp++);
-
- DOUT << "RawFrm CurOp " << CurOp << "\n";
- DOUT << "isMBB " << MO.isMBB() << "\n";
- DOUT << "isGlobal " << MO.isGlobal() << "\n";
- DOUT << "isSymbol " << MO.isSymbol() << "\n";
- DOUT << "isImm " << MO.isImm() << "\n";
-
- if (MO.isMBB()) {
- emitPCRelativeBlockAddress(MO.getMBB());
- } else if (MO.isGlobal()) {
- // Assume undefined functions may be outside the Small codespace.
- bool NeedStub =
- (Is64BitMode &&
- (TM.getCodeModel() == CodeModel::Large ||
- TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
- Opcode == X86::TAILJMPd;
- emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
- MO.getOffset(), 0, NeedStub);
- } else if (MO.isSymbol()) {
- emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
- } else if (MO.isImm()) {
- if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
- // Fix up immediate operand for pc relative calls.
- intptr_t Imm = (intptr_t)MO.getImm();
- Imm = Imm - MCE.getCurrentPCValue() - 4;
- emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc));
- } else
- emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc));
- } else {
- assert(0 && "Unknown RawFrm operand!");
- }
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+
+ DEBUG(errs() << "RawFrm CurOp " << CurOp << "\n");
+ DEBUG(errs() << "isMBB " << MO.isMBB() << "\n");
+ DEBUG(errs() << "isGlobal " << MO.isGlobal() << "\n");
+ DEBUG(errs() << "isSymbol " << MO.isSymbol() << "\n");
+ DEBUG(errs() << "isImm " << MO.isImm() << "\n");
+
+ if (MO.isMBB()) {
+ emitPCRelativeBlockAddress(MO.getMBB());
+ break;
}
+
+ if (MO.isGlobal()) {
+ // Assume undefined functions may be outside the Small codespace.
+ bool NeedStub =
+ (Is64BitMode &&
+ (TM.getCodeModel() == CodeModel::Large ||
+ TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
+ Opcode == X86::TAILJMPd;
+ emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
+ MO.getOffset(), 0, NeedStub);
+ break;
+ }
+
+ if (MO.isSymbol()) {
+ emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
+ break;
+ }
+
+ assert(MO.isImm() && "Unknown RawFrm operand!");
+ if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
+ // Fix up immediate operand for pc relative calls.
+ intptr_t Imm = (intptr_t)MO.getImm();
+ Imm = Imm - MCE.getCurrentPCValue() - 4;
+ emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc));
+ } else
+ emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc));
break;
-
- case X86II::AddRegFrm:
+ }
+
+ case X86II::AddRegFrm: {
MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
- if (CurOp != NumOps) {
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86InstrInfo::sizeOfImm(Desc);
- if (MO1.isImm())
- emitConstant(MO1.getImm(), Size);
- else {
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- // This should not occur on Darwin for relocatable objects.
- if (Opcode == X86::MOV64ri)
- rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
- if (MO1.isGlobal()) {
- bool NeedStub = isa<Function>(MO1.getGlobal());
- bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal());
- emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- NeedStub, Indirect);
- } else if (MO1.isSymbol())
- emitExternalSymbolAddress(MO1.getSymbolName(), rt);
- else if (MO1.isCPI())
- emitConstPoolAddress(MO1.getIndex(), rt);
- else if (MO1.isJTI())
- emitJumpTableAddress(MO1.getIndex(), rt);
- }
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO1.isImm()) {
+ emitConstant(MO1.getImm(), Size);
+ break;
}
+
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64ri64i32)
+ rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
+ // This should not occur on Darwin for relocatable objects.
+ if (Opcode == X86::MOV64ri)
+ rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
+ if (MO1.isGlobal()) {
+ bool NeedStub = isa<Function>(MO1.getGlobal());
+ bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+ emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+ NeedStub, Indirect);
+ } else if (MO1.isSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isCPI())
+ emitConstPoolAddress(MO1.getIndex(), rt);
+ else if (MO1.isJTI())
+ emitJumpTableAddress(MO1.getIndex(), rt);
break;
+ }
case X86II::MRMDestReg: {
MCE.emitByte(BaseOpcode);
@@ -656,7 +700,8 @@ void Emitter<CodeEmitter>::emitInstruction(
getX86RegNum(MI.getOperand(CurOp+1).getReg()));
CurOp += 2;
if (CurOp != NumOps)
- emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc));
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86InstrInfo::sizeOfImm(Desc));
break;
}
case X86II::MRMDestMem: {
@@ -666,7 +711,8 @@ void Emitter<CodeEmitter>::emitInstruction(
.getReg()));
CurOp += X86AddrNumOperands + 1;
if (CurOp != NumOps)
- emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc));
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86InstrInfo::sizeOfImm(Desc));
break;
}
@@ -729,29 +775,31 @@ void Emitter<CodeEmitter>::emitInstruction(
(Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
}
- if (CurOp != NumOps) {
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86InstrInfo::sizeOfImm(Desc);
- if (MO1.isImm())
- emitConstant(MO1.getImm(), Size);
- else {
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- if (Opcode == X86::MOV64ri32)
- rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
- if (MO1.isGlobal()) {
- bool NeedStub = isa<Function>(MO1.getGlobal());
- bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal());
- emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- NeedStub, Indirect);
- } else if (MO1.isSymbol())
- emitExternalSymbolAddress(MO1.getSymbolName(), rt);
- else if (MO1.isCPI())
- emitConstPoolAddress(MO1.getIndex(), rt);
- else if (MO1.isJTI())
- emitJumpTableAddress(MO1.getIndex(), rt);
- }
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO1.isImm()) {
+ emitConstant(MO1.getImm(), Size);
+ break;
}
+
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64ri32)
+ rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
+ if (MO1.isGlobal()) {
+ bool NeedStub = isa<Function>(MO1.getGlobal());
+ bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+ emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+ NeedStub, Indirect);
+ } else if (MO1.isSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isCPI())
+ emitConstPoolAddress(MO1.getIndex(), rt);
+ else if (MO1.isJTI())
+ emitJumpTableAddress(MO1.getIndex(), rt);
break;
}
@@ -768,29 +816,31 @@ void Emitter<CodeEmitter>::emitInstruction(
PCAdj);
CurOp += X86AddrNumOperands;
- if (CurOp != NumOps) {
- const MachineOperand &MO = MI.getOperand(CurOp++);
- unsigned Size = X86InstrInfo::sizeOfImm(Desc);
- if (MO.isImm())
- emitConstant(MO.getImm(), Size);
- else {
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- if (Opcode == X86::MOV64mi32)
- rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
- if (MO.isGlobal()) {
- bool NeedStub = isa<Function>(MO.getGlobal());
- bool Indirect = gvNeedsNonLazyPtr(MO.getGlobal());
- emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
- NeedStub, Indirect);
- } else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), rt);
- else if (MO.isCPI())
- emitConstPoolAddress(MO.getIndex(), rt);
- else if (MO.isJTI())
- emitJumpTableAddress(MO.getIndex(), rt);
- }
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO.isImm()) {
+ emitConstant(MO.getImm(), Size);
+ break;
}
+
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64mi32)
+ rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
+ if (MO.isGlobal()) {
+ bool NeedStub = isa<Function>(MO.getGlobal());
+ bool Indirect = gvNeedsNonLazyPtr(MO, TM);
+ emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
+ NeedStub, Indirect);
+ } else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), rt);
+ else if (MO.isCPI())
+ emitConstPoolAddress(MO.getIndex(), rt);
+ else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), rt);
break;
}
@@ -804,10 +854,264 @@ void Emitter<CodeEmitter>::emitInstruction(
}
if (!Desc->isVariadic() && CurOp != NumOps) {
- cerr << "Cannot encode: ";
- MI.dump();
- cerr << '\n';
- abort();
+#ifndef NDEBUG
+ errs() << "Cannot encode all operands of: " << MI << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
+
+// Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter.
+//
+// FIXME: This is a total hack designed to allow work on llvm-mc to proceed
+// without being blocked on various cleanups needed to support a clean interface
+// to instruction encoding.
+//
+// Look away!
+
+#include "llvm/DerivedTypes.h"
+
+namespace {
+class MCSingleInstructionCodeEmitter : public MachineCodeEmitter {
+ uint8_t Data[256];
+
+public:
+ MCSingleInstructionCodeEmitter() { reset(); }
+
+ void reset() {
+ BufferBegin = Data;
+ BufferEnd = array_endof(Data);
+ CurBufferPtr = Data;
+ }
+
+ StringRef str() {
+ return StringRef(reinterpret_cast<char*>(BufferBegin),
+ CurBufferPtr - BufferBegin);
+ }
+
+ virtual void startFunction(MachineFunction &F) {}
+ virtual bool finishFunction(MachineFunction &F) { return false; }
+ virtual void emitLabel(uint64_t LabelID) {}
+ virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {}
+ virtual bool earlyResolveAddresses() const { return false; }
+ virtual void addRelocation(const MachineRelocation &MR) { }
+ virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
+ return 0;
+ }
+ virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
+ return 0;
+ }
+ virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+ return 0;
+ }
+ virtual uintptr_t getLabelAddress(uint64_t LabelID) const {
+ return 0;
+ }
+ virtual void setModuleInfo(MachineModuleInfo* Info) {}
+};
+
+class X86MCCodeEmitter : public MCCodeEmitter {
+ X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+
+private:
+ X86TargetMachine &TM;
+ llvm::Function *DummyF;
+ TargetData *DummyTD;
+ mutable llvm::MachineFunction *DummyMF;
+ llvm::MachineBasicBlock *DummyMBB;
+
+ MCSingleInstructionCodeEmitter *InstrEmitter;
+ Emitter<MachineCodeEmitter> *Emit;
+
+public:
+ X86MCCodeEmitter(X86TargetMachine &_TM) : TM(_TM) {
+ // Verily, thou shouldst avert thine eyes.
+ const llvm::FunctionType *FTy =
+ FunctionType::get(llvm::Type::getVoidTy(getGlobalContext()), false);
+ DummyF = Function::Create(FTy, GlobalValue::InternalLinkage);
+ DummyTD = new TargetData("");
+ DummyMF = new MachineFunction(DummyF, TM);
+ DummyMBB = DummyMF->CreateMachineBasicBlock();
+
+ InstrEmitter = new MCSingleInstructionCodeEmitter();
+ Emit = new Emitter<MachineCodeEmitter>(TM, *InstrEmitter,
+ *TM.getInstrInfo(),
+ *DummyTD, false);
+ }
+ ~X86MCCodeEmitter() {
+ delete Emit;
+ delete InstrEmitter;
+ delete DummyMF;
+ delete DummyF;
+ }
+
+ bool AddRegToInstr(const MCInst &MI, MachineInstr *Instr,
+ unsigned Start) const {
+ if (Start + 1 > MI.getNumOperands())
+ return false;
+
+ const MCOperand &Op = MI.getOperand(Start);
+ if (!Op.isReg()) return false;
+
+ Instr->addOperand(MachineOperand::CreateReg(Op.getReg(), false));
+ return true;
+ }
+
+ bool AddImmToInstr(const MCInst &MI, MachineInstr *Instr,
+ unsigned Start) const {
+ if (Start + 1 > MI.getNumOperands())
+ return false;
+
+ const MCOperand &Op = MI.getOperand(Start);
+ if (Op.isImm()) {
+ Instr->addOperand(MachineOperand::CreateImm(Op.getImm()));
+ return true;
+ }
+ if (!Op.isExpr())
+ return false;
+
+ const MCExpr *Expr = Op.getExpr();
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) {
+ Instr->addOperand(MachineOperand::CreateImm(CE->getValue()));
+ return true;
+ }
+
+ // FIXME: Relocation / fixup.
+ Instr->addOperand(MachineOperand::CreateImm(0));
+ return true;
+ }
+
+ bool AddLMemToInstr(const MCInst &MI, MachineInstr *Instr,
+ unsigned Start) const {
+ return (AddRegToInstr(MI, Instr, Start + 0) &&
+ AddImmToInstr(MI, Instr, Start + 1) &&
+ AddRegToInstr(MI, Instr, Start + 2) &&
+ AddImmToInstr(MI, Instr, Start + 3));
+ }
+
+ bool AddMemToInstr(const MCInst &MI, MachineInstr *Instr,
+ unsigned Start) const {
+ return (AddRegToInstr(MI, Instr, Start + 0) &&
+ AddImmToInstr(MI, Instr, Start + 1) &&
+ AddRegToInstr(MI, Instr, Start + 2) &&
+ AddImmToInstr(MI, Instr, Start + 3) &&
+ AddRegToInstr(MI, Instr, Start + 4));
+ }
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS) const {
+ // Don't look yet!
+
+ // Convert the MCInst to a MachineInstr so we can (ab)use the regular
+ // emitter.
+ const X86InstrInfo &II = *TM.getInstrInfo();
+ const TargetInstrDesc &Desc = II.get(MI.getOpcode());
+ MachineInstr *Instr = DummyMF->CreateMachineInstr(Desc, DebugLoc());
+ DummyMBB->push_back(Instr);
+
+ unsigned Opcode = MI.getOpcode();
+ unsigned NumOps = MI.getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) {
+ Instr->addOperand(MachineOperand::CreateReg(0, false));
+ ++CurOp;
+ } else if (NumOps > 2 &&
+ Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+ // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+ --NumOps;
+
+ bool OK = true;
+ switch (Desc.TSFlags & X86II::FormMask) {
+ case X86II::MRMDestReg:
+ case X86II::MRMSrcReg:
+ // Matching doesn't fill this in completely, we have to choose operand 0
+ // for a tied register.
+ OK &= AddRegToInstr(MI, Instr, 0); CurOp++;
+ OK &= AddRegToInstr(MI, Instr, CurOp++);
+ if (CurOp < NumOps)
+ OK &= AddImmToInstr(MI, Instr, CurOp);
+ break;
+
+ case X86II::RawFrm:
+ if (CurOp < NumOps) {
+ // Hack to make branches work.
+ if (!(Desc.TSFlags & X86II::ImmMask) &&
+ MI.getOperand(0).isExpr() &&
+ isa<MCSymbolRefExpr>(MI.getOperand(0).getExpr()))
+ Instr->addOperand(MachineOperand::CreateMBB(DummyMBB));
+ else
+ OK &= AddImmToInstr(MI, Instr, CurOp);
+ }
+ break;
+
+ case X86II::AddRegFrm:
+ OK &= AddRegToInstr(MI, Instr, CurOp++);
+ if (CurOp < NumOps)
+ OK &= AddImmToInstr(MI, Instr, CurOp);
+ break;
+
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ // Matching doesn't fill this in completely, we have to choose operand 0
+ // for a tied register.
+ OK &= AddRegToInstr(MI, Instr, 0); CurOp++;
+ if (CurOp < NumOps)
+ OK &= AddImmToInstr(MI, Instr, CurOp);
+ break;
+
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5;
+ if (CurOp < NumOps)
+ OK &= AddImmToInstr(MI, Instr, CurOp);
+ break;
+
+ case X86II::MRMSrcMem:
+ OK &= AddRegToInstr(MI, Instr, CurOp++);
+ if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+ Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+ OK &= AddLMemToInstr(MI, Instr, CurOp);
+ else
+ OK &= AddMemToInstr(MI, Instr, CurOp);
+ break;
+
+ case X86II::MRMDestMem:
+ OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5;
+ OK &= AddRegToInstr(MI, Instr, CurOp);
+ break;
+
+ default:
+ case X86II::MRMInitReg:
+ case X86II::Pseudo:
+ OK = false;
+ break;
+ }
+
+ if (!OK) {
+ errs() << "couldn't convert inst '";
+ MI.dump();
+ errs() << "' to machine instr:\n";
+ Instr->dump();
+ }
+
+ InstrEmitter->reset();
+ if (OK)
+ Emit->emitInstruction(*Instr, &Desc);
+ OS << InstrEmitter->str();
+
+ Instr->eraseFromParent();
}
+};
}
+// Ok, now you can look.
+MCCodeEmitter *llvm::createX86MCCodeEmitter(const Target &,
+ TargetMachine &TM) {
+ return new X86MCCodeEmitter(static_cast<X86TargetMachine&>(TM));
+}
diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm
index 8002f98..f321778 100644
--- a/lib/Target/X86/X86CompilationCallback_Win64.asm
+++ b/lib/Target/X86/X86CompilationCallback_Win64.asm
@@ -17,10 +17,11 @@ extrn X86CompilationCallback2: PROC
X86CompilationCallback proc
push rbp
- ; Save RSP
+ ; Save RSP.
mov rbp, rsp
; Save all int arg registers
+ ; WARNING: We cannot use register spill area - we're generating stubs by hands!
push rcx
push rdx
push r8
@@ -29,27 +30,27 @@ X86CompilationCallback proc
; Align stack on 16-byte boundary.
and rsp, -16
- ; Save all XMM arg registers
- sub rsp, 64
- movaps [rsp], xmm0
- movaps [rsp+16], xmm1
- movaps [rsp+32], xmm2
- movaps [rsp+48], xmm3
+ ; Save all XMM arg registers. Also allocate reg spill area.
+ sub rsp, 96
+ movaps [rsp +32], xmm0
+ movaps [rsp+16+32], xmm1
+ movaps [rsp+32+32], xmm2
+ movaps [rsp+48+32], xmm3
; JIT callee
- ; Pass prev frame and return address
+ ; Pass prev frame and return address.
mov rcx, rbp
mov rdx, qword ptr [rbp+8]
call X86CompilationCallback2
- ; Restore all XMM arg registers
- movaps xmm3, [rsp+48]
- movaps xmm2, [rsp+32]
- movaps xmm1, [rsp+16]
- movaps xmm0, [rsp]
+ ; Restore all XMM arg registers.
+ movaps xmm3, [rsp+48+32]
+ movaps xmm2, [rsp+32+32]
+ movaps xmm1, [rsp+16+32]
+ movaps xmm0, [rsp +32]
- ; Restore RSP
+ ; Restore RSP.
mov rsp, rbp
; Restore all int arg registers
@@ -59,7 +60,7 @@ X86CompilationCallback proc
pop rdx
pop rcx
- ; Restore RBP
+ ; Restore RBP.
pop rbp
ret
X86CompilationCallback endp
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 912ab0e..1597d2b3 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -14,6 +14,7 @@
#include "X86ELFWriterInfo.h"
#include "X86Relocations.h"
#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -38,11 +39,13 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
return R_X86_64_PC32;
case X86::reloc_absolute_word:
return R_X86_64_32;
+ case X86::reloc_absolute_word_sext:
+ return R_X86_64_32S;
case X86::reloc_absolute_dword:
return R_X86_64_64;
case X86::reloc_picrel_word:
default:
- assert(0 && "unknown relocation type");
+ llvm_unreachable("unknown x86_64 machine relocation type");
}
} else {
switch(MachineRelTy) {
@@ -50,23 +53,101 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
return R_386_PC32;
case X86::reloc_absolute_word:
return R_386_32;
+ case X86::reloc_absolute_word_sext:
case X86::reloc_absolute_dword:
case X86::reloc_picrel_word:
default:
- assert(0 && "unknown relocation type");
+ llvm_unreachable("unknown x86 machine relocation type");
}
}
return 0;
}
-long int X86ELFWriterInfo::getAddendForRelTy(unsigned RelTy) const {
+long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
if (is64Bit) {
switch(RelTy) {
- case R_X86_64_PC32: return -4;
- break;
+ case R_X86_64_PC32: return Modifier - 4;
+ case R_X86_64_32:
+ case R_X86_64_32S:
+ case R_X86_64_64:
+ return Modifier;
default:
- assert(0 && "unknown x86 relocation type");
+ llvm_unreachable("unknown x86_64 relocation type");
+ }
+ } else {
+ switch(RelTy) {
+ case R_386_PC32: return Modifier - 4;
+ case R_386_32: return Modifier;
+ default:
+ llvm_unreachable("unknown x86 relocation type");
+ }
+ }
+ return 0;
+}
+
+unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ if (is64Bit) {
+ switch(RelTy) {
+ case R_X86_64_PC32:
+ case R_X86_64_32:
+ case R_X86_64_32S:
+ return 32;
+ case R_X86_64_64:
+ return 64;
+ default:
+ llvm_unreachable("unknown x86_64 relocation type");
+ }
+ } else {
+ switch(RelTy) {
+ case R_386_PC32:
+ case R_386_32:
+ return 32;
+ default:
+ llvm_unreachable("unknown x86 relocation type");
}
}
return 0;
}
+
+bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ if (is64Bit) {
+ switch(RelTy) {
+ case R_X86_64_PC32:
+ return true;
+ case R_X86_64_32:
+ case R_X86_64_32S:
+ case R_X86_64_64:
+ return false;
+ default:
+ llvm_unreachable("unknown x86_64 relocation type");
+ }
+ } else {
+ switch(RelTy) {
+ case R_386_PC32:
+ return true;
+ case R_386_32:
+ return false;
+ default:
+ llvm_unreachable("unknown x86 relocation type");
+ }
+ }
+ return 0;
+}
+
+unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ return is64Bit ?
+ X86::reloc_absolute_dword : X86::reloc_absolute_word;
+}
+
+long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+
+ if (RelTy == R_X86_64_PC32 || RelTy == R_386_PC32)
+ return SymOffset - (RelOffset + 4);
+ else
+ assert("computeRelocation unknown for this relocation type");
+
+ return 0;
+}
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index 2ba1a0b..342e6e6 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -49,9 +49,26 @@ namespace llvm {
/// ELF relocation entry.
virtual bool hasRelocationAddend() const { return is64Bit ? true : false; }
- /// getAddendForRelTy - Gets the addend value for an ELF relocation entry
- /// based on the target relocation type
- virtual long int getAddendForRelTy(unsigned RelTy) const;
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatable field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+ unsigned RelTy) const;
};
} // end llvm namespace
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index b336d78..3401df0 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -78,19 +79,20 @@ public:
#include "X86GenFastISel.inc"
private:
- bool X86FastEmitCompare(Value *LHS, Value *RHS, MVT VT);
+ bool X86FastEmitCompare(Value *LHS, Value *RHS, EVT VT);
- bool X86FastEmitLoad(MVT VT, const X86AddressMode &AM, unsigned &RR);
+ bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
- bool X86FastEmitStore(MVT VT, Value *Val,
+ bool X86FastEmitStore(EVT VT, Value *Val,
const X86AddressMode &AM);
- bool X86FastEmitStore(MVT VT, unsigned Val,
+ bool X86FastEmitStore(EVT VT, unsigned Val,
const X86AddressMode &AM);
- bool X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, unsigned Src, MVT SrcVT,
+ bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
- bool X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall);
+ bool X86SelectAddress(Value *V, X86AddressMode &AM);
+ bool X86SelectCallAddress(Value *V, X86AddressMode &AM);
bool X86SelectLoad(Instruction *I);
@@ -116,7 +118,7 @@ private:
bool X86VisitIntrinsicCall(IntrinsicInst &I);
bool X86SelectCall(Instruction *I);
- CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false);
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
const X86InstrInfo *getInstrInfo() const {
return getTargetMachine()->getInstrInfo();
@@ -131,17 +133,17 @@ private:
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
/// computed in an SSE register, not on the X87 floating point stack.
- bool isScalarFPTypeInSSEReg(MVT VT) const {
+ bool isScalarFPTypeInSSEReg(EVT VT) const {
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
- bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+ bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false);
};
} // end anonymous namespace.
-bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
VT = TLI.getValueType(Ty, /*HandleUnknown=*/true);
if (VT == MVT::Other || !VT.isSimple())
// Unhandled type. Halt "fast" selection and bail.
@@ -167,7 +169,8 @@ bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
/// convention.
-CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
+CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
+ bool isTaillCall) {
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
@@ -186,13 +189,14 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
-bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM,
+bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
unsigned &ResultReg) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
- switch (VT.getSimpleVT()) {
+ switch (VT.getSimpleVT().SimpleTy) {
default: return false;
+ case MVT::i1:
case MVT::i8:
Opc = X86::MOV8rm;
RC = X86::GR8RegisterClass;
@@ -243,13 +247,21 @@ bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM,
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
bool
-X86FastISel::X86FastEmitStore(MVT VT, unsigned Val,
+X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
const X86AddressMode &AM) {
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
- switch (VT.getSimpleVT()) {
+ switch (VT.getSimpleVT().SimpleTy) {
case MVT::f80: // No f80 support yet.
default: return false;
+ case MVT::i1: {
+ // Mask out all but lowest bit.
+ unsigned AndResult = createResultReg(X86::GR8RegisterClass);
+ BuildMI(MBB, DL,
+ TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
+ Val = AndResult;
+ }
+ // FALLTHROUGH, handling i1 as i8.
case MVT::i8: Opc = X86::MOV8mr; break;
case MVT::i16: Opc = X86::MOV16mr; break;
case MVT::i32: Opc = X86::MOV32mr; break;
@@ -266,17 +278,19 @@ X86FastISel::X86FastEmitStore(MVT VT, unsigned Val,
return true;
}
-bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val,
+bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val,
const X86AddressMode &AM) {
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
- Val = Constant::getNullValue(TD.getIntPtrType());
+ Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
// If this is a store of a simple constant, fold the constant into the store.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
unsigned Opc = 0;
- switch (VT.getSimpleVT()) {
+ bool Signed = true;
+ switch (VT.getSimpleVT().SimpleTy) {
default: break;
+ case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8.
case MVT::i8: Opc = X86::MOV8mi; break;
case MVT::i16: Opc = X86::MOV16mi; break;
case MVT::i32: Opc = X86::MOV32mi; break;
@@ -289,7 +303,8 @@ bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val,
if (Opc) {
addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
- .addImm(CI->getSExtValue());
+ .addImm(Signed ? CI->getSExtValue() :
+ CI->getZExtValue());
return true;
}
}
@@ -304,8 +319,8 @@ bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val,
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
/// ISD::SIGN_EXTEND).
-bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT,
- unsigned Src, MVT SrcVT,
+bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
+ unsigned Src, EVT SrcVT,
unsigned &ResultReg) {
unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
@@ -318,7 +333,7 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT,
/// X86SelectAddress - Attempt to fill in an address from the given value.
///
-bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
+bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
if (Instruction *I = dyn_cast<Instruction>(V)) {
@@ -333,22 +348,21 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
default: break;
case Instruction::BitCast:
// Look past bitcasts.
- return X86SelectAddress(U->getOperand(0), AM, isCall);
+ return X86SelectAddress(U->getOperand(0), AM);
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
- return X86SelectAddress(U->getOperand(0), AM, isCall);
+ return X86SelectAddress(U->getOperand(0), AM);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
- return X86SelectAddress(U->getOperand(0), AM, isCall);
+ return X86SelectAddress(U->getOperand(0), AM);
break;
case Instruction::Alloca: {
- if (isCall) break;
// Do static allocas.
const AllocaInst *A = cast<AllocaInst>(V);
DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A);
@@ -361,21 +375,19 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
}
case Instruction::Add: {
- if (isCall) break;
// Adds of constants are common and easy enough.
if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
// They have to fit in the 32-bit signed displacement field though.
if (isInt32(Disp)) {
AM.Disp = (uint32_t)Disp;
- return X86SelectAddress(U->getOperand(0), AM, isCall);
+ return X86SelectAddress(U->getOperand(0), AM);
}
}
break;
}
case Instruction::GetElementPtr: {
- if (isCall) break;
// Pattern-match simple GEPs.
uint64_t Disp = (int32_t)AM.Disp;
unsigned IndexReg = AM.IndexReg;
@@ -416,7 +428,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
AM.IndexReg = IndexReg;
AM.Scale = Scale;
AM.Disp = (uint32_t)Disp;
- return X86SelectAddress(U->getOperand(0), AM, isCall);
+ return X86SelectAddress(U->getOperand(0), AM);
unsupported_gep:
// Ok, the GEP indices weren't all covered.
break;
@@ -426,8 +438,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
// Handle constant address.
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
// Can't handle alternate code models yet.
- if (TM.getCodeModel() != CodeModel::Default &&
- TM.getCodeModel() != CodeModel::Small)
+ if (TM.getCodeModel() != CodeModel::Small)
return false;
// RIP-relative addresses can't have additional register operands.
@@ -440,63 +451,149 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
if (GVar->isThreadLocal())
return false;
- // Set up the basic address.
+ // Okay, we've committed to selecting this global. Set up the basic address.
AM.GV = GV;
- if (!isCall &&
- TM.getRelocationModel() == Reloc::PIC_ &&
- !Subtarget->is64Bit())
- AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
+ // Allow the subtarget to classify the global.
+ unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
- // Emit an extra load if the ABI requires it.
- if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) {
- // Check to see if we've already materialized this
- // value in a register in this block.
- DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
- if (I != LocalValueMap.end() && I->second != 0) {
- AM.Base.Reg = I->second;
- AM.GV = 0;
- return true;
+ // If this reference is relative to the pic base, set it now.
+ if (isGlobalRelativeToPICBase(GVFlags)) {
+ // FIXME: How do we know Base.Reg is free??
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
+ }
+
+ // Unless the ABI requires an extra load, return a direct reference to
+ // the global.
+ if (!isGlobalStubReference(GVFlags)) {
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
}
-
+ AM.GVOpFlags = GVFlags;
+ return true;
+ }
+
+ // Ok, we need to do a load from a stub. If we've already loaded from this
+ // stub, reuse the loaded pointer, otherwise emit the load now.
+ DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+ unsigned LoadReg;
+ if (I != LocalValueMap.end() && I->second != 0) {
+ LoadReg = I->second;
+ } else {
// Issue load from stub.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
X86AddressMode StubAM;
StubAM.Base.Reg = AM.Base.Reg;
- StubAM.GV = AM.GV;
-
- if (TLI.getPointerTy() == MVT::i32) {
- Opc = X86::MOV32rm;
- RC = X86::GR32RegisterClass;
-
- if (Subtarget->isPICStyleGOT() &&
- TM.getRelocationModel() == Reloc::PIC_)
- StubAM.GVOpFlags = X86II::MO_GOT;
-
- } else {
+ StubAM.GV = GV;
+ StubAM.GVOpFlags = GVFlags;
+
+ if (TLI.getPointerTy() == MVT::i64) {
Opc = X86::MOV64rm;
RC = X86::GR64RegisterClass;
- if (TM.getRelocationModel() != Reloc::Static) {
- StubAM.GVOpFlags = X86II::MO_GOTPCREL;
+ if (Subtarget->isPICStyleRIPRel())
StubAM.Base.Reg = X86::RIP;
- }
+ } else {
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
}
+
+ LoadReg = createResultReg(RC);
+ addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM);
+
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = LoadReg;
+ }
+
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = LoadReg;
+ AM.GV = 0;
+ return true;
+ }
- unsigned ResultReg = createResultReg(RC);
- addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM);
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
- // Now construct the final address. Note that the Disp, Scale,
- // and Index values may already be set here.
- AM.Base.Reg = ResultReg;
- AM.GV = 0;
+ return false;
+}
- // Prevent loading GV stub multiple times in same MBB.
- LocalValueMap[V] = AM.Base.Reg;
- } else if (Subtarget->isPICStyleRIPRel()) {
- // Use rip-relative addressing if we can.
+/// X86SelectCallAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
+ User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+ U = I;
+ } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts.
+ return X86SelectCallAddress(U->getOperand(0), AM);
+
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
+ }
+
+ // Handle constant address.
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // RIP-relative addresses can't have additional register operands.
+ if (Subtarget->isPICStyleRIPRel() &&
+ (AM.Base.Reg != 0 || AM.IndexReg != 0))
+ return false;
+
+ // Can't handle TLS or DLLImport.
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage())
+ return false;
+
+ // Okay, we've committed to selecting this global. Set up the basic address.
+ AM.GV = GV;
+
+ // No ABI requires an extra load for anything other than DLLImport, which
+ // we rejected above. Return a direct reference to the global.
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
AM.Base.Reg = X86::RIP;
+ } else if (Subtarget->isPICStyleStubPIC()) {
+ AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
+ } else if (Subtarget->isPICStyleGOT()) {
+ AM.GVOpFlags = X86II::MO_GOTOFF;
}
return true;
@@ -518,14 +615,15 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
return false;
}
+
/// X86SelectStore - Select and emit code to implement store instructions.
bool X86FastISel::X86SelectStore(Instruction* I) {
- MVT VT;
- if (!isTypeLegal(I->getOperand(0)->getType(), VT))
+ EVT VT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
return false;
X86AddressMode AM;
- if (!X86SelectAddress(I->getOperand(1), AM, false))
+ if (!X86SelectAddress(I->getOperand(1), AM))
return false;
return X86FastEmitStore(VT, I->getOperand(0), AM);
@@ -534,12 +632,12 @@ bool X86FastISel::X86SelectStore(Instruction* I) {
/// X86SelectLoad - Select and emit code to implement load instructions.
///
bool X86FastISel::X86SelectLoad(Instruction *I) {
- MVT VT;
- if (!isTypeLegal(I->getType(), VT))
+ EVT VT;
+ if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
return false;
X86AddressMode AM;
- if (!X86SelectAddress(I->getOperand(0), AM, false))
+ if (!X86SelectAddress(I->getOperand(0), AM))
return false;
unsigned ResultReg = 0;
@@ -550,8 +648,8 @@ bool X86FastISel::X86SelectLoad(Instruction *I) {
return false;
}
-static unsigned X86ChooseCmpOpcode(MVT VT) {
- switch (VT.getSimpleVT()) {
+static unsigned X86ChooseCmpOpcode(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
default: return 0;
case MVT::i8: return X86::CMP8rr;
case MVT::i16: return X86::CMP16rr;
@@ -565,8 +663,8 @@ static unsigned X86ChooseCmpOpcode(MVT VT) {
/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
/// of the comparison, return an opcode that works for the compare (e.g.
/// CMP32ri) otherwise return 0.
-static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) {
- switch (VT.getSimpleVT()) {
+static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) {
+ switch (VT.getSimpleVT().SimpleTy) {
// Otherwise, we can't fold the immediate into this comparison.
default: return 0;
case MVT::i8: return X86::CMP8ri;
@@ -581,13 +679,13 @@ static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) {
}
}
-bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) {
+bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) {
unsigned Op0Reg = getRegForValue(Op0);
if (Op0Reg == 0) return false;
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Op1))
- Op1 = Constant::getNullValue(TD.getIntPtrType());
+ Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
// We have two options: compare with register or immediate. If the RHS of
// the compare is an immediate that we can fold into this compare, use
@@ -613,7 +711,7 @@ bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) {
bool X86FastISel::X86SelectCmp(Instruction *I) {
CmpInst *CI = cast<CmpInst>(I);
- MVT VT;
+ EVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT))
return false;
@@ -688,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {
bool X86FastISel::X86SelectZExt(Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
- if (I->getType() == Type::Int8Ty &&
- I->getOperand(0)->getType() == Type::Int1Ty) {
+ if (I->getType() == Type::getInt8Ty(I->getContext()) &&
+ I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) {
unsigned ResultReg = getRegForValue(I->getOperand(0));
if (ResultReg == 0) return false;
// Set the high bits to zero.
@@ -713,7 +811,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
// Fold the common case of a conditional branch with a comparison.
if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse()) {
- MVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+ EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
// Try to take advantage of fallthrough opportunities.
CmpInst::Predicate Predicate = CI->getPredicate();
@@ -850,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
bool X86FastISel::X86SelectShift(Instruction *I) {
unsigned CReg = 0, OpReg = 0, OpImm = 0;
const TargetRegisterClass *RC = NULL;
- if (I->getType() == Type::Int8Ty) {
+ if (I->getType() == Type::getInt8Ty(I->getContext())) {
CReg = X86::CL;
RC = &X86::GR8RegClass;
switch (I->getOpcode()) {
@@ -859,7 +957,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
default: return false;
}
- } else if (I->getType() == Type::Int16Ty) {
+ } else if (I->getType() == Type::getInt16Ty(I->getContext())) {
CReg = X86::CX;
RC = &X86::GR16RegClass;
switch (I->getOpcode()) {
@@ -868,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
default: return false;
}
- } else if (I->getType() == Type::Int32Ty) {
+ } else if (I->getType() == Type::getInt32Ty(I->getContext())) {
CReg = X86::ECX;
RC = &X86::GR32RegClass;
switch (I->getOpcode()) {
@@ -877,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
default: return false;
}
- } else if (I->getType() == Type::Int64Ty) {
+ } else if (I->getType() == Type::getInt64Ty(I->getContext())) {
CReg = X86::RCX;
RC = &X86::GR64RegClass;
switch (I->getOpcode()) {
@@ -890,7 +988,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
return false;
}
- MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
+ EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
return false;
@@ -924,7 +1022,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
}
bool X86FastISel::X86SelectSelect(Instruction *I) {
- MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
+ EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
return false;
@@ -959,9 +1057,10 @@ bool X86FastISel::X86SelectSelect(Instruction *I) {
bool X86FastISel::X86SelectFPExt(Instruction *I) {
// fpext from float to double.
- if (Subtarget->hasSSE2() && I->getType() == Type::DoubleTy) {
+ if (Subtarget->hasSSE2() &&
+ I->getType()->isDoubleTy()) {
Value *V = I->getOperand(0);
- if (V->getType() == Type::FloatTy) {
+ if (V->getType()->isFloatTy()) {
unsigned OpReg = getRegForValue(V);
if (OpReg == 0) return false;
unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
@@ -976,9 +1075,9 @@ bool X86FastISel::X86SelectFPExt(Instruction *I) {
bool X86FastISel::X86SelectFPTrunc(Instruction *I) {
if (Subtarget->hasSSE2()) {
- if (I->getType() == Type::FloatTy) {
+ if (I->getType()->isFloatTy()) {
Value *V = I->getOperand(0);
- if (V->getType() == Type::DoubleTy) {
+ if (V->getType()->isDoubleTy()) {
unsigned OpReg = getRegForValue(V);
if (OpReg == 0) return false;
unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
@@ -996,8 +1095,8 @@ bool X86FastISel::X86SelectTrunc(Instruction *I) {
if (Subtarget->is64Bit())
// All other cases should be handled by the tblgen generated code.
return false;
- MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
- MVT DstVT = TLI.getValueType(I->getType());
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
// This code only handles truncation to byte right now.
if (DstVT != MVT::i8 && DstVT != MVT::i1)
@@ -1065,7 +1164,7 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
const Type *RetTy =
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
- MVT VT;
+ EVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
@@ -1125,7 +1224,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
// Handle only C and fastcc calling conventions for now.
CallSite CS(CI);
- unsigned CC = CS.getCallingConv();
+ CallingConv::ID CC = CS.getCallingConv();
if (CC != CallingConv::C &&
CC != CallingConv::Fast &&
CC != CallingConv::X86_FastCall)
@@ -1144,8 +1243,8 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
// Handle *simple* calls for now.
const Type *RetTy = CS.getType();
- MVT RetVT;
- if (RetTy == Type::VoidTy)
+ EVT RetVT;
+ if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
else if (!isTypeLegal(RetTy, RetVT, true))
return false;
@@ -1153,7 +1252,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
// Materialize callee address in a register. FIXME: GV address can be
// handled with a CALLpcrel32 instead.
X86AddressMode CalleeAM;
- if (!X86SelectAddress(Callee, CalleeAM, true))
+ if (!X86SelectCallAddress(Callee, CalleeAM))
return false;
unsigned CalleeOp = 0;
GlobalValue *GV = 0;
@@ -1174,7 +1273,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
// Deal with call operands first.
SmallVector<Value*, 8> ArgVals;
SmallVector<unsigned, 8> Args;
- SmallVector<MVT, 8> ArgVTs;
+ SmallVector<EVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
Args.reserve(CS.arg_size());
ArgVals.reserve(CS.arg_size());
@@ -1200,7 +1299,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
return false;
const Type *ArgTy = (*i)->getType();
- MVT ArgVT;
+ EVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
@@ -1214,7 +1313,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, false, TM, ArgLocs);
+ CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
// Get a count of how many bytes are to be pushed on the stack.
@@ -1230,11 +1329,11 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
unsigned Arg = Args[VA.getValNo()];
- MVT ArgVT = ArgVTs[VA.getValNo()];
+ EVT ArgVT = ArgVTs[VA.getValNo()];
// Promote the value if needed.
switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
+ default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt: {
bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
@@ -1266,6 +1365,14 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
ArgVT = VA.getLocVT();
break;
}
+ case CCValAssign::BCvt: {
+ unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
+ ISD::BIT_CONVERT, Arg);
+ assert(BC != 0 && "Failed to emit a bitcast!");
+ Arg = BC;
+ ArgVT = VA.getLocVT();
+ break;
+ }
}
if (VA.isRegLoc()) {
@@ -1294,28 +1401,53 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
- if (!Subtarget->is64Bit() &&
- TM.getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT()) {
+ if (Subtarget->isPICStyleGOT()) {
TargetRegisterClass *RC = X86::GR32RegisterClass;
unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
Emitted = true;
}
-
+
// Issue the call.
- unsigned CallOpc = CalleeOp
- ? (Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r)
- : (Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32);
- MachineInstrBuilder MIB = CalleeOp
- ? BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp)
- : BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV);
+ MachineInstrBuilder MIB;
+ if (CalleeOp) {
+ // Register-indirect call.
+ unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r;
+ MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp);
+
+ } else {
+ // Direct call.
+ assert(GV && "Not a direct call");
+ unsigned CallOpc =
+ Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+
+ // See if we need any target-specific flags on the GV operand.
+ unsigned char OpFlags = 0;
+
+ // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+ // external symbols most go through the PLT in PIC mode. If the symbol
+ // has hidden or protected visibility, or if it is static or local, then
+ // we don't need to use the PLT - we can directly call it.
+ if (Subtarget->isTargetELF() &&
+ TM.getRelocationModel() == Reloc::PIC_ &&
+ GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ Subtarget->getDarwinVers() < 9) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ }
+
+
+ MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags);
+ }
// Add an implicit use GOT pointer in EBX.
- if (!Subtarget->is64Bit() &&
- TM.getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT())
+ if (Subtarget->isPICStyleGOT())
MIB.addReg(X86::EBX);
// Add implicit physical register uses to the call.
@@ -1327,14 +1459,14 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0);
// Now handle call return value (if any).
- if (RetVT.getSimpleVT() != MVT::isVoid) {
+ if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, TM, RVLocs);
+ CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
- MVT CopyVT = RVLocs[0].getValVT();
+ EVT CopyVT = RVLocs[0].getValVT();
TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
TargetRegisterClass *SrcRC = DstRC;
@@ -1358,7 +1490,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
// Round the F80 the right size, which also moves to the appropriate xmm
// register. This is accomplished by storing the F80 value in memory and
// then loading it back. Ewww...
- MVT ResVT = RVLocs[0].getValVT();
+ EVT ResVT = RVLocs[0].getValVT();
unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
unsigned MemSize = ResVT.getSizeInBits()/8;
int FI = MFI.CreateStackObject(MemSize, MemSize);
@@ -1418,8 +1550,8 @@ X86FastISel::TargetSelectInstruction(Instruction *I) {
return X86SelectExtractValue(I);
case Instruction::IntToPtr: // Deliberate fall-through.
case Instruction::PtrToInt: {
- MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
- MVT DstVT = TLI.getValueType(I->getType());
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
if (DstVT.bitsGT(SrcVT))
return X86SelectZExt(I);
if (DstVT.bitsLT(SrcVT))
@@ -1435,14 +1567,14 @@ X86FastISel::TargetSelectInstruction(Instruction *I) {
}
unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
- MVT VT;
+ EVT VT;
if (!isTypeLegal(C->getType(), VT))
return false;
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
- switch (VT.getSimpleVT()) {
+ switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i8:
Opc = X86::MOV8rm;
@@ -1487,7 +1619,7 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
// Materialize addresses with LEA instructions.
if (isa<GlobalValue>(C)) {
X86AddressMode AM;
- if (X86SelectAddress(C, AM, false)) {
+ if (X86SelectAddress(C, AM)) {
if (TLI.getPointerTy() == MVT::i32)
Opc = X86::LEA32r;
else
@@ -1509,16 +1641,15 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
unsigned char OpFlag = 0;
- if (TM.getRelocationModel() == Reloc::PIC_) {
- if (Subtarget->isPICStyleStub()) {
- OpFlag = X86II::MO_PIC_BASE_OFFSET;
- PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
- } else if (Subtarget->isPICStyleGOT()) {
- OpFlag = X86II::MO_GOTOFF;
- PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
- } else if (Subtarget->isPICStyleRIPRel() &&
- TM.getCodeModel() == CodeModel::Small)
- PICBase = X86::RIP;
+ if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+ PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+ } else if (Subtarget->isPICStyleGOT()) {
+ OpFlag = X86II::MO_GOTOFF;
+ PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+ } else if (Subtarget->isPICStyleRIPRel() &&
+ TM.getCodeModel() == CodeModel::Small) {
+ PICBase = X86::RIP;
}
// Create the load from the constant pool.
@@ -1542,7 +1673,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) {
return 0;
X86AddressMode AM;
- if (!X86SelectAddress(C, AM, false))
+ if (!X86SelectAddress(C, AM))
return 0;
unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 37027ee..d9a05a8 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -31,19 +31,21 @@
#define DEBUG_TYPE "x86-codegen"
#include "X86.h"
#include "X86InstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include <algorithm>
using namespace llvm;
@@ -56,6 +58,7 @@ namespace {
FPS() : MachineFunctionPass(&ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -73,12 +76,12 @@ namespace {
unsigned StackTop; // The current top of the FP stack.
void dumpStack() const {
- cerr << "Stack contents:";
+ errs() << "Stack contents:";
for (unsigned i = 0; i != StackTop; ++i) {
- cerr << " FP" << Stack[i];
+ errs() << " FP" << Stack[i];
assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
}
- cerr << "\n";
+ errs() << "\n";
}
private:
/// isStackEmpty - Return true if the FP stack is empty.
@@ -210,6 +213,14 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
I != E; ++I)
Changed |= processBasicBlock(MF, **I);
+ // Process any unreachable blocks in arbitrary order now.
+ if (MF.size() == Processed.size())
+ return Changed;
+
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ if (Processed.insert(BB))
+ Changed |= processBasicBlock(MF, *BB);
+
return Changed;
}
@@ -236,7 +247,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
PrevMI = prior(I);
++NumFP; // Keep track of # of pseudo instrs
- DOUT << "\nFPInst:\t" << *MI;
+ DEBUG(errs() << "\nFPInst:\t" << *MI);
// Get dead variables list now because the MI pointer may be deleted as part
// of processing!
@@ -255,7 +266,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
case X86II::CompareFP: handleCompareFP(I); break;
case X86II::CondMovFP: handleCondMovFP(I); break;
case X86II::SpecialFP: handleSpecialFP(I); break;
- default: assert(0 && "Unknown FP Type!");
+ default: llvm_unreachable("Unknown FP Type!");
}
// Check to see if any of the values defined by this instruction are dead
@@ -263,7 +274,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
unsigned Reg = DeadRegs[i];
if (Reg >= X86::FP0 && Reg <= X86::FP6) {
- DOUT << "Register FP#" << Reg-X86::FP0 << " is dead!\n";
+ DEBUG(errs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
freeStackSlotAfter(I, Reg-X86::FP0);
}
}
@@ -272,13 +283,13 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
DEBUG(
MachineBasicBlock::iterator PrevI(PrevMI);
if (I == PrevI) {
- cerr << "Just deleted pseudo instruction\n";
+ errs() << "Just deleted pseudo instruction\n";
} else {
MachineBasicBlock::iterator Start = I;
// Rewind to first instruction newly inserted.
while (Start != BB.begin() && prior(Start) != PrevI) --Start;
- cerr << "Inserted instructions:\n\t";
- Start->print(*cerr.stream(), &MF.getTarget());
+ errs() << "Inserted instructions:\n\t";
+ Start->print(errs(), &MF.getTarget());
while (++Start != next(I)) {}
}
dumpStack();
@@ -945,7 +956,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
MachineInstr *MI = I;
DebugLoc dl = MI->getDebugLoc();
switch (MI->getOpcode()) {
- default: assert(0 && "Unknown SpecialFP instruction!");
+ default: llvm_unreachable("Unknown SpecialFP instruction!");
case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 009846e..3e0385c 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -35,6 +35,7 @@ namespace {
FPRegKiller() : MachineFunctionPass(&ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -117,9 +118,10 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
!ContainsFPCode && SI != E; ++SI) {
for (BasicBlock::const_iterator II = SI->begin();
(PN = dyn_cast<PHINode>(II)); ++II) {
- if (PN->getType()==Type::X86_FP80Ty ||
+ if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) ||
(!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) ||
- (!Subtarget.hasSSE2() && PN->getType()==Type::DoubleTy)) {
+ (!Subtarget.hasSSE2() &&
+ PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) {
ContainsFPCode = true;
break;
}
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 1336177..5b678fb 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -35,8 +35,9 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -78,7 +79,8 @@ namespace {
X86ISelAddressMode()
: BaseType(RegBase), Scale(1), IndexReg(), Disp(0),
- Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), SymbolFlags(0) {
+ Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0),
+ SymbolFlags(X86II::MO_NO_FLAG) {
}
bool hasSymbolicDisplacement() const {
@@ -105,23 +107,37 @@ namespace {
}
void dump() {
- cerr << "X86ISelAddressMode " << this << "\n";
- cerr << "Base.Reg ";
- if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump();
- else cerr << "nul";
- cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
- cerr << " Scale" << Scale << "\n";
- cerr << "IndexReg ";
- if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
- else cerr << "nul";
- cerr << " Disp " << Disp << "\n";
- cerr << "GV "; if (GV) GV->dump();
- else cerr << "nul";
- cerr << " CP "; if (CP) CP->dump();
- else cerr << "nul";
- cerr << "\n";
- cerr << "ES "; if (ES) cerr << ES; else cerr << "nul";
- cerr << " JT" << JT << " Align" << Align << "\n";
+ errs() << "X86ISelAddressMode " << this << '\n';
+ errs() << "Base.Reg ";
+ if (Base.Reg.getNode() != 0)
+ Base.Reg.getNode()->dump();
+ else
+ errs() << "nul";
+ errs() << " Base.FrameIndex " << Base.FrameIndex << '\n'
+ << " Scale" << Scale << '\n'
+ << "IndexReg ";
+ if (IndexReg.getNode() != 0)
+ IndexReg.getNode()->dump();
+ else
+ errs() << "nul";
+ errs() << " Disp " << Disp << '\n'
+ << "GV ";
+ if (GV)
+ GV->dump();
+ else
+ errs() << "nul";
+ errs() << " CP ";
+ if (CP)
+ CP->dump();
+ else
+ errs() << "nul";
+ errs() << '\n'
+ << "ES ";
+ if (ES)
+ errs() << ES;
+ else
+ errs() << "nul";
+ errs() << " JT" << JT << " Align" << Align << '\n';
}
};
}
@@ -140,10 +156,6 @@ namespace {
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- /// CurBB - Current BB being isel'd.
- ///
- MachineBasicBlock *CurBB;
-
/// OptForSize - If true, selector should try to optimize for code size
/// instead of performance.
bool OptForSize;
@@ -174,12 +186,14 @@ namespace {
private:
SDNode *Select(SDValue N);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+ SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
- bool MatchAddress(SDValue N, X86ISelAddressMode &AM,
- unsigned Depth = 0);
+ bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
+ bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth);
bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
@@ -342,13 +356,17 @@ static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
Ops.push_back(Load.getOperand(0));
else
Ops.push_back(TF.getOperand(i));
- CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
- CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
- CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
+ SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
+ SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF,
+ Load.getOperand(1),
+ Load.getOperand(2));
+ CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1),
Store.getOperand(2), Store.getOperand(3));
}
-/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
+/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The
+/// chain produced by the load must only be used by the store's chain operand,
+/// otherwise this may produce a cycle in the DAG.
///
static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
SDValue &Load) {
@@ -366,8 +384,9 @@ static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
return false;
if (N.hasOneUse() &&
+ LD->hasNUsesOfValue(1, 1) &&
N.getOperand(1) == Address &&
- N.getNode()->isOperandOf(Chain.getNode())) {
+ LD->isOperandOf(Chain.getNode())) {
Load = N;
return true;
}
@@ -431,7 +450,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
if (Chain.getOperand(0).getNode() == Callee.getNode())
return true;
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
- Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()))
+ Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
+ Callee.getValue(1).hasOneUse())
return true;
return false;
}
@@ -583,8 +603,8 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
// If the source and destination are SSE registers, then this is a legal
// conversion that should not be lowered.
- MVT SrcVT = N->getOperand(0).getValueType();
- MVT DstVT = N->getValueType(0);
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DstVT = N->getValueType(0);
bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
if (SrcIsSSE && DstIsSSE)
@@ -602,7 +622,7 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
// FPStack has extload and truncstore. SSE can fold direct loads into other
// operations. Based on this, decide what we want to do.
- MVT MemVT;
+ EVT MemVT;
if (N->getOpcode() == ISD::FP_ROUND)
MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
else
@@ -635,8 +655,7 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
/// when it has created a SelectionDAG for us to codegen.
void X86DAGToDAGISel::InstructionSelect() {
- CurBB = BB; // BB can change as result of isel.
- const Function *F = CurDAG->getMachineFunction().getFunction();
+ const Function *F = MF->getFunction();
OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
DEBUG(BB->dump());
@@ -648,12 +667,12 @@ void X86DAGToDAGISel::InstructionSelect() {
// Codegen the basic block.
#ifndef NDEBUG
- DOUT << "===== Instruction selection begins:\n";
+ DEBUG(errs() << "===== Instruction selection begins:\n");
Indent = 0;
#endif
SelectRoot(*CurDAG);
#ifndef NDEBUG
- DOUT << "===== Instruction selection ends:\n";
+ DEBUG(errs() << "===== Instruction selection ends:\n");
#endif
CurDAG->RemoveDeadNodes();
@@ -706,7 +725,7 @@ bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
/// into an addressing mode. These wrap things that will resolve down into a
/// symbol reference. If no match is possible, this returns true, otherwise it
-/// returns false.
+/// returns false.
bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
// If the addressing mode already has a symbol as the displacement, we can
// never match another symbol.
@@ -714,28 +733,27 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
return true;
SDValue N0 = N.getOperand(0);
-
+ CodeModel::Model M = TM.getCodeModel();
+
// Handle X86-64 rip-relative addresses. We check this before checking direct
// folding because RIP is preferable to non-RIP accesses.
if (Subtarget->is64Bit() &&
// Under X86-64 non-small code model, GV (and friends) are 64-bits, so
// they cannot be folded into immediate fields.
// FIXME: This can be improved for kernel and other models?
- TM.getCodeModel() == CodeModel::Small &&
-
+ (M == CodeModel::Small || M == CodeModel::Kernel) &&
// Base and index reg must be 0 in order to use %rip as base and lowering
// must allow RIP.
!AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
-
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
int64_t Offset = AM.Disp + G->getOffset();
- if (!isInt32(Offset)) return true;
+ if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
AM.GV = G->getGlobal();
AM.Disp = Offset;
AM.SymbolFlags = G->getTargetFlags();
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
int64_t Offset = AM.Disp + CP->getOffset();
- if (!isInt32(Offset)) return true;
+ if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
AM.CP = CP->getConstVal();
AM.Align = CP->getAlignment();
AM.Disp = Offset;
@@ -748,7 +766,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
AM.JT = J->getIndex();
AM.SymbolFlags = J->getTargetFlags();
}
-
+
if (N.getOpcode() == X86ISD::WrapperRIP)
AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
return false;
@@ -758,7 +776,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
// X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit
// mode, this results in a non-RIP-relative computation.
if (!Subtarget->is64Bit() ||
- (TM.getCodeModel() == CodeModel::Small &&
+ ((M == CodeModel::Small || M == CodeModel::Kernel) &&
TM.getRelocationModel() == Reloc::Static)) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
AM.GV = G->getGlobal();
@@ -786,15 +804,49 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
/// MatchAddress - Add the specified node to the specified addressing mode,
/// returning true if it cannot be done. This just pattern matches for the
/// addressing mode.
-bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
- unsigned Depth) {
+bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
+ if (MatchAddressRecursively(N, AM, 0))
+ return true;
+
+ // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
+ // a smaller encoding and avoids a scaled-index.
+ if (AM.Scale == 2 &&
+ AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base.Reg.getNode() == 0) {
+ AM.Base.Reg = AM.IndexReg;
+ AM.Scale = 1;
+ }
+
+ // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
+ // because it has a smaller encoding.
+ // TODO: Which other code models can use this?
+ if (TM.getCodeModel() == CodeModel::Small &&
+ Subtarget->is64Bit() &&
+ AM.Scale == 1 &&
+ AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base.Reg.getNode() == 0 &&
+ AM.IndexReg.getNode() == 0 &&
+ AM.SymbolFlags == X86II::MO_NO_FLAG &&
+ AM.hasSymbolicDisplacement())
+ AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+
+ return false;
+}
+
+bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth) {
bool is64Bit = Subtarget->is64Bit();
DebugLoc dl = N.getDebugLoc();
- DOUT << "MatchAddress: "; DEBUG(AM.dump());
+ DEBUG({
+ errs() << "MatchAddress: ";
+ AM.dump();
+ });
// Limit recursion.
if (Depth > 5)
return MatchAddressBase(N, AM);
-
+
+ CodeModel::Model M = TM.getCodeModel();
+
// If this is already a %rip relative address, we can only merge immediates
// into it. Instead of handling this in every case, we handle it here.
// RIP relative addressing: %rip + 32-bit displacement!
@@ -803,10 +855,11 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
// displacements. It isn't very important, but this should be fixed for
// consistency.
if (!AM.ES && AM.JT != -1) return true;
-
+
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
int64_t Val = AM.Disp + Cst->getSExtValue();
- if (isInt32(Val)) {
+ if (X86::isOffsetSuitableForCodeModel(Val, M,
+ AM.hasSymbolicDisplacement())) {
AM.Disp = Val;
return false;
}
@@ -818,7 +871,9 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
default: break;
case ISD::Constant: {
uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
- if (!is64Bit || isInt32(AM.Disp + Val)) {
+ if (!is64Bit ||
+ X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M,
+ AM.hasSymbolicDisplacement())) {
AM.Disp += Val;
return false;
}
@@ -857,6 +912,10 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
if (ConstantSDNode
*CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
unsigned Val = CN->getZExtValue();
+ // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
+ // that the base operand remains free for further matching. If
+ // the base doesn't end up getting used, a post-processing step
+ // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
if (Val == 1 || Val == 2 || Val == 3) {
AM.Scale = 1 << Val;
SDValue ShVal = N.getNode()->getOperand(0);
@@ -870,7 +929,9 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
- if (!is64Bit || isInt32(Disp))
+ if (!is64Bit ||
+ X86::isOffsetSuitableForCodeModel(Disp, M,
+ AM.hasSymbolicDisplacement()))
AM.Disp = Disp;
else
AM.IndexReg = ShVal;
@@ -912,7 +973,9 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
CN->getZExtValue();
- if (!is64Bit || isInt32(Disp))
+ if (!is64Bit ||
+ X86::isOffsetSuitableForCodeModel(Disp, M,
+ AM.hasSymbolicDisplacement()))
AM.Disp = Disp;
else
Reg = N.getNode()->getOperand(0);
@@ -936,7 +999,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
// Test if the LHS of the sub can be folded.
X86ISelAddressMode Backup = AM;
- if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) {
+ if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
AM = Backup;
break;
}
@@ -998,12 +1061,12 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
case ISD::ADD: {
X86ISelAddressMode Backup = AM;
- if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) &&
- !MatchAddress(N.getNode()->getOperand(1), AM, Depth+1))
+ if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) &&
+ !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1))
return false;
AM = Backup;
- if (!MatchAddress(N.getNode()->getOperand(1), AM, Depth+1) &&
- !MatchAddress(N.getNode()->getOperand(0), AM, Depth+1))
+ if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) &&
+ !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1))
return false;
AM = Backup;
@@ -1027,11 +1090,13 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
X86ISelAddressMode Backup = AM;
uint64_t Offset = CN->getSExtValue();
// Start with the LHS as an addr mode.
- if (!MatchAddress(N.getOperand(0), AM, Depth+1) &&
+ if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
// Address could not have picked a GV address for the displacement.
AM.GV == NULL &&
// On x86-64, the resultant disp must fit in 32-bits.
- (!is64Bit || isInt32(AM.Disp + Offset)) &&
+ (!is64Bit ||
+ X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M,
+ AM.hasSymbolicDisplacement())) &&
// Check to see if the LHS & C is zero.
CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
AM.Disp += Offset;
@@ -1219,7 +1284,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
if (!Done && MatchAddress(N, AM))
return false;
- MVT VT = N.getValueType();
+ EVT VT = N.getValueType();
if (AM.BaseType == X86ISelAddressMode::RegBase) {
if (!AM.Base.Reg.getNode())
AM.Base.Reg = CurDAG->getRegister(0, VT);
@@ -1292,7 +1357,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
assert (T == AM.Segment);
AM.Segment = Copy;
- MVT VT = N.getValueType();
+ EVT VT = N.getValueType();
unsigned Complexity = 0;
if (AM.BaseType == X86ISelAddressMode::RegBase)
if (AM.Base.Reg.getNode())
@@ -1329,12 +1394,13 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
Complexity++;
- if (Complexity > 2) {
- SDValue Segment;
- getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
- return true;
- }
- return false;
+ // If it isn't worth using an LEA, reject it.
+ if (Complexity <= 2)
+ return false;
+
+ SDValue Segment;
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
}
/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
@@ -1380,7 +1446,6 @@ bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
/// initialize the global base register, if necessary.
///
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
- MachineFunction *MF = CurBB->getParent();
unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
@@ -1400,367 +1465,686 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return NULL;
- SDValue LSI = Node->getOperand(4); // MemOperand
- const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain};
- return CurDAG->getTargetNode(Opc, Node->getDebugLoc(),
- MVT::i32, MVT::i32, MVT::Other, Ops,
- array_lengthof(Ops));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ MVT::i32, MVT::i32, MVT::Other, Ops,
+ array_lengthof(Ops));
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+ return ResNode;
+}
+
+SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
+ if (Node->hasAnyUseOfValue(0))
+ return 0;
+
+ // Optimize common patterns for __sync_add_and_fetch and
+ // __sync_sub_and_fetch where the result is not used. This allows us
+ // to use "lock" version of add, sub, inc, dec instructions.
+ // FIXME: Do not use special instructions but instead add the "lock"
+ // prefix to the target node somehow. The extra information will then be
+ // transferred to machine instruction and it denotes the prefix.
+ SDValue Chain = Node->getOperand(0);
+ SDValue Ptr = Node->getOperand(1);
+ SDValue Val = Node->getOperand(2);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return 0;
+
+ bool isInc = false, isDec = false, isSub = false, isCN = false;
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+ if (CN) {
+ isCN = true;
+ int64_t CNVal = CN->getSExtValue();
+ if (CNVal == 1)
+ isInc = true;
+ else if (CNVal == -1)
+ isDec = true;
+ else if (CNVal >= 0)
+ Val = CurDAG->getTargetConstant(CNVal, NVT);
+ else {
+ isSub = true;
+ Val = CurDAG->getTargetConstant(-CNVal, NVT);
+ }
+ } else if (Val.hasOneUse() &&
+ Val.getOpcode() == ISD::SUB &&
+ X86::isZeroNode(Val.getOperand(0))) {
+ isSub = true;
+ Val = Val.getOperand(1);
+ }
+
+ unsigned Opc = 0;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ if (isInc)
+ Opc = X86::LOCK_INC8m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC8m;
+ else if (isSub) {
+ if (isCN)
+ Opc = X86::LOCK_SUB8mi;
+ else
+ Opc = X86::LOCK_SUB8mr;
+ } else {
+ if (isCN)
+ Opc = X86::LOCK_ADD8mi;
+ else
+ Opc = X86::LOCK_ADD8mr;
+ }
+ break;
+ case MVT::i16:
+ if (isInc)
+ Opc = X86::LOCK_INC16m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC16m;
+ else if (isSub) {
+ if (isCN) {
+ if (Predicate_i16immSExt8(Val.getNode()))
+ Opc = X86::LOCK_SUB16mi8;
+ else
+ Opc = X86::LOCK_SUB16mi;
+ } else
+ Opc = X86::LOCK_SUB16mr;
+ } else {
+ if (isCN) {
+ if (Predicate_i16immSExt8(Val.getNode()))
+ Opc = X86::LOCK_ADD16mi8;
+ else
+ Opc = X86::LOCK_ADD16mi;
+ } else
+ Opc = X86::LOCK_ADD16mr;
+ }
+ break;
+ case MVT::i32:
+ if (isInc)
+ Opc = X86::LOCK_INC32m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC32m;
+ else if (isSub) {
+ if (isCN) {
+ if (Predicate_i32immSExt8(Val.getNode()))
+ Opc = X86::LOCK_SUB32mi8;
+ else
+ Opc = X86::LOCK_SUB32mi;
+ } else
+ Opc = X86::LOCK_SUB32mr;
+ } else {
+ if (isCN) {
+ if (Predicate_i32immSExt8(Val.getNode()))
+ Opc = X86::LOCK_ADD32mi8;
+ else
+ Opc = X86::LOCK_ADD32mi;
+ } else
+ Opc = X86::LOCK_ADD32mr;
+ }
+ break;
+ case MVT::i64:
+ if (isInc)
+ Opc = X86::LOCK_INC64m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC64m;
+ else if (isSub) {
+ Opc = X86::LOCK_SUB64mr;
+ if (isCN) {
+ if (Predicate_i64immSExt8(Val.getNode()))
+ Opc = X86::LOCK_SUB64mi8;
+ else if (Predicate_i64immSExt32(Val.getNode()))
+ Opc = X86::LOCK_SUB64mi32;
+ }
+ } else {
+ Opc = X86::LOCK_ADD64mr;
+ if (isCN) {
+ if (Predicate_i64immSExt8(Val.getNode()))
+ Opc = X86::LOCK_ADD64mi8;
+ else if (Predicate_i64immSExt32(Val.getNode()))
+ Opc = X86::LOCK_ADD64mi32;
+ }
+ }
+ break;
+ }
+
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
+ dl, NVT), 0);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ if (isInc || isDec) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
+ SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+ } else {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+ SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+ }
+}
+
+/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
+/// any uses which require the SF or OF bits to be accurate.
+static bool HasNoSignedComparisonUses(SDNode *N) {
+ // Examine each user of the node.
+ for (SDNode::use_iterator UI = N->use_begin(),
+ UE = N->use_end(); UI != UE; ++UI) {
+ // Only examine CopyToReg uses.
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ // Only examine CopyToReg uses that copy to EFLAGS.
+ if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
+ X86::EFLAGS)
+ return false;
+ // Examine each user of the CopyToReg use.
+ for (SDNode::use_iterator FlagUI = UI->use_begin(),
+ FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
+ // Only examine the Flag result.
+ if (FlagUI.getUse().getResNo() != 1) continue;
+ // Anything unusual: assume conservatively.
+ if (!FlagUI->isMachineOpcode()) return false;
+ // Examine the opcode of the user.
+ switch (FlagUI->getMachineOpcode()) {
+ // These comparisons don't treat the most significant bit specially.
+ case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
+ case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
+ case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
+ case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
+ case X86::JA: case X86::JAE: case X86::JB: case X86::JBE:
+ case X86::JE: case X86::JNE: case X86::JP: case X86::JNP:
+ case X86::CMOVA16rr: case X86::CMOVA16rm:
+ case X86::CMOVA32rr: case X86::CMOVA32rm:
+ case X86::CMOVA64rr: case X86::CMOVA64rm:
+ case X86::CMOVAE16rr: case X86::CMOVAE16rm:
+ case X86::CMOVAE32rr: case X86::CMOVAE32rm:
+ case X86::CMOVAE64rr: case X86::CMOVAE64rm:
+ case X86::CMOVB16rr: case X86::CMOVB16rm:
+ case X86::CMOVB32rr: case X86::CMOVB32rm:
+ case X86::CMOVB64rr: case X86::CMOVB64rm:
+ case X86::CMOVBE16rr: case X86::CMOVBE16rm:
+ case X86::CMOVBE32rr: case X86::CMOVBE32rm:
+ case X86::CMOVBE64rr: case X86::CMOVBE64rm:
+ case X86::CMOVE16rr: case X86::CMOVE16rm:
+ case X86::CMOVE32rr: case X86::CMOVE32rm:
+ case X86::CMOVE64rr: case X86::CMOVE64rm:
+ case X86::CMOVNE16rr: case X86::CMOVNE16rm:
+ case X86::CMOVNE32rr: case X86::CMOVNE32rm:
+ case X86::CMOVNE64rr: case X86::CMOVNE64rm:
+ case X86::CMOVNP16rr: case X86::CMOVNP16rm:
+ case X86::CMOVNP32rr: case X86::CMOVNP32rm:
+ case X86::CMOVNP64rr: case X86::CMOVNP64rm:
+ case X86::CMOVP16rr: case X86::CMOVP16rm:
+ case X86::CMOVP32rr: case X86::CMOVP32rm:
+ case X86::CMOVP64rr: case X86::CMOVP64rm:
+ continue;
+ // Anything else: assume conservatively.
+ default: return false;
+ }
+ }
+ }
+ return true;
}
SDNode *X86DAGToDAGISel::Select(SDValue N) {
SDNode *Node = N.getNode();
- MVT NVT = Node->getValueType(0);
+ EVT NVT = Node->getValueType(0);
unsigned Opc, MOpc;
unsigned Opcode = Node->getOpcode();
DebugLoc dl = Node->getDebugLoc();
#ifndef NDEBUG
- DOUT << std::string(Indent, ' ') << "Selecting: ";
- DEBUG(Node->dump(CurDAG));
- DOUT << "\n";
+ DEBUG({
+ errs() << std::string(Indent, ' ') << "Selecting: ";
+ Node->dump(CurDAG);
+ errs() << '\n';
+ });
Indent += 2;
#endif
if (Node->isMachineOpcode()) {
#ifndef NDEBUG
- DOUT << std::string(Indent-2, ' ') << "== ";
- DEBUG(Node->dump(CurDAG));
- DOUT << "\n";
+ DEBUG({
+ errs() << std::string(Indent-2, ' ') << "== ";
+ Node->dump(CurDAG);
+ errs() << '\n';
+ });
Indent -= 2;
#endif
return NULL; // Already selected.
}
switch (Opcode) {
- default: break;
- case X86ISD::GlobalBaseReg:
- return getGlobalBaseReg();
-
- case X86ISD::ATOMOR64_DAG:
- return SelectAtomic64(Node, X86::ATOMOR6432);
- case X86ISD::ATOMXOR64_DAG:
- return SelectAtomic64(Node, X86::ATOMXOR6432);
- case X86ISD::ATOMADD64_DAG:
- return SelectAtomic64(Node, X86::ATOMADD6432);
- case X86ISD::ATOMSUB64_DAG:
- return SelectAtomic64(Node, X86::ATOMSUB6432);
- case X86ISD::ATOMNAND64_DAG:
- return SelectAtomic64(Node, X86::ATOMNAND6432);
- case X86ISD::ATOMAND64_DAG:
- return SelectAtomic64(Node, X86::ATOMAND6432);
- case X86ISD::ATOMSWAP64_DAG:
- return SelectAtomic64(Node, X86::ATOMSWAP6432);
-
- case ISD::SMUL_LOHI:
- case ISD::UMUL_LOHI: {
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
-
- bool isSigned = Opcode == ISD::SMUL_LOHI;
- if (!isSigned)
- switch (NVT.getSimpleVT()) {
- default: assert(0 && "Unsupported VT!");
- case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
- case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
- case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
- case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
- }
- else
- switch (NVT.getSimpleVT()) {
- default: assert(0 && "Unsupported VT!");
- case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
- case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
- case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
- case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
- }
+ default: break;
+ case X86ISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case X86ISD::ATOMOR64_DAG:
+ return SelectAtomic64(Node, X86::ATOMOR6432);
+ case X86ISD::ATOMXOR64_DAG:
+ return SelectAtomic64(Node, X86::ATOMXOR6432);
+ case X86ISD::ATOMADD64_DAG:
+ return SelectAtomic64(Node, X86::ATOMADD6432);
+ case X86ISD::ATOMSUB64_DAG:
+ return SelectAtomic64(Node, X86::ATOMSUB6432);
+ case X86ISD::ATOMNAND64_DAG:
+ return SelectAtomic64(Node, X86::ATOMNAND6432);
+ case X86ISD::ATOMAND64_DAG:
+ return SelectAtomic64(Node, X86::ATOMAND6432);
+ case X86ISD::ATOMSWAP64_DAG:
+ return SelectAtomic64(Node, X86::ATOMSWAP6432);
+
+ case ISD::ATOMIC_LOAD_ADD: {
+ SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
- unsigned LoReg, HiReg;
- switch (NVT.getSimpleVT()) {
- default: assert(0 && "Unsupported VT!");
- case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
- case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
- case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
- case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SMUL_LOHI;
+ if (!isSigned) {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
+ case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
+ case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+ case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
}
-
- SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
- // multiplty is commmutative
- if (!foldedLoad) {
- foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
- if (foldedLoad)
- std::swap(N0, N1);
+ } else {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
+ case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
+ case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
+ case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
}
+ }
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
- N0, SDValue()).getValue(1);
-
- if (foldedLoad) {
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
- InFlag };
- SDNode *CNode =
- CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
- array_lengthof(Ops));
- InFlag = SDValue(CNode, 1);
- // Update the chain.
- ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
- } else {
- InFlag =
- SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
- }
+ unsigned LoReg, HiReg;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
+ case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
+ case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
+ case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+ }
- // Copy the low half of the result, if it is needed.
- if (!N.getValue(0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- LoReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(N.getValue(0), Result);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ // Multiply is commmutative.
+ if (!foldedLoad) {
+ foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ if (foldedLoad)
+ std::swap(N0, N1);
+ }
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ N0, SDValue()).getValue(1);
+
+ if (foldedLoad) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ SDNode *CNode =
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 1);
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ } else {
+ InFlag =
+ SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ }
+
+ // Copy the low half of the result, if it is needed.
+ if (!N.getValue(0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(N.getValue(0), Result);
#ifndef NDEBUG
- DOUT << std::string(Indent-2, ' ') << "=> ";
- DEBUG(Result.getNode()->dump(CurDAG));
- DOUT << "\n";
+ DEBUG({
+ errs() << std::string(Indent-2, ' ') << "=> ";
+ Result.getNode()->dump(CurDAG);
+ errs() << '\n';
+ });
#endif
+ }
+ // Copy the high half of the result, if it is needed.
+ if (!N.getValue(1).use_empty()) {
+ SDValue Result;
+ if (HiReg == X86::AH && Subtarget->is64Bit()) {
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)), 0);
+ // Then truncate it down to i8.
+ Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ MVT::i8, Result);
+ } else {
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
}
- // Copy the high half of the result, if it is needed.
- if (!N.getValue(1).use_empty()) {
- SDValue Result;
- if (HiReg == X86::AH && Subtarget->is64Bit()) {
- // Prevent use of AH in a REX instruction by referencing AX instead.
- // Shift it down 8 bits.
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::AX, MVT::i16, InFlag);
- InFlag = Result.getValue(2);
- Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
- Result,
- CurDAG->getTargetConstant(8, MVT::i8)), 0);
- // Then truncate it down to i8.
- SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
- Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
- MVT::i8, Result, SRIdx), 0);
- } else {
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- }
- ReplaceUses(N.getValue(1), Result);
+ ReplaceUses(N.getValue(1), Result);
#ifndef NDEBUG
- DOUT << std::string(Indent-2, ' ') << "=> ";
- DEBUG(Result.getNode()->dump(CurDAG));
- DOUT << "\n";
+ DEBUG({
+ errs() << std::string(Indent-2, ' ') << "=> ";
+ Result.getNode()->dump(CurDAG);
+ errs() << '\n';
+ });
#endif
- }
+ }
#ifndef NDEBUG
- Indent -= 2;
+ Indent -= 2;
#endif
- return NULL;
- }
-
- case ISD::SDIVREM:
- case ISD::UDIVREM: {
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
-
- bool isSigned = Opcode == ISD::SDIVREM;
- if (!isSigned)
- switch (NVT.getSimpleVT()) {
- default: assert(0 && "Unsupported VT!");
- case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
- case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
- case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
- case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
- }
- else
- switch (NVT.getSimpleVT()) {
- default: assert(0 && "Unsupported VT!");
- case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
- case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
- case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
- case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
- }
+ return NULL;
+ }
- unsigned LoReg, HiReg;
- unsigned ClrOpcode, SExtOpcode;
- switch (NVT.getSimpleVT()) {
- default: assert(0 && "Unsupported VT!");
- case MVT::i8:
- LoReg = X86::AL; HiReg = X86::AH;
- ClrOpcode = 0;
- SExtOpcode = X86::CBW;
- break;
- case MVT::i16:
- LoReg = X86::AX; HiReg = X86::DX;
- ClrOpcode = X86::MOV16r0;
- SExtOpcode = X86::CWD;
- break;
- case MVT::i32:
- LoReg = X86::EAX; HiReg = X86::EDX;
- ClrOpcode = X86::MOV32r0;
- SExtOpcode = X86::CDQ;
- break;
- case MVT::i64:
- LoReg = X86::RAX; HiReg = X86::RDX;
- ClrOpcode = X86::MOV64r0;
- SExtOpcode = X86::CQO;
- break;
+ case ISD::SDIVREM:
+ case ISD::UDIVREM: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SDIVREM;
+ if (!isSigned) {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
+ case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
+ case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
+ case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
}
+ } else {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
+ case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
+ case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+ case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
+ }
+ }
- SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
- bool signBitIsZero = CurDAG->SignBitIsZero(N0);
-
- SDValue InFlag;
- if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
- // Special case for div8, just use a move with zero extension to AX to
- // clear the upper 8 bits (AH).
- SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
- if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
- Move =
- SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16,
- MVT::Other, Ops,
- array_lengthof(Ops)), 0);
- Chain = Move.getValue(1);
- ReplaceUses(N0.getValue(1), Chain);
- } else {
- Move =
- SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
- Chain = CurDAG->getEntryNode();
- }
- Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
- InFlag = Chain.getValue(1);
+ unsigned LoReg, HiReg;
+ unsigned ClrOpcode, SExtOpcode;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8:
+ LoReg = X86::AL; HiReg = X86::AH;
+ ClrOpcode = 0;
+ SExtOpcode = X86::CBW;
+ break;
+ case MVT::i16:
+ LoReg = X86::AX; HiReg = X86::DX;
+ ClrOpcode = X86::MOV16r0;
+ SExtOpcode = X86::CWD;
+ break;
+ case MVT::i32:
+ LoReg = X86::EAX; HiReg = X86::EDX;
+ ClrOpcode = X86::MOV32r0;
+ SExtOpcode = X86::CDQ;
+ break;
+ case MVT::i64:
+ LoReg = X86::RAX; HiReg = X86::RDX;
+ ClrOpcode = ~0U; // NOT USED.
+ SExtOpcode = X86::CQO;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ bool signBitIsZero = CurDAG->SignBitIsZero(N0);
+
+ SDValue InFlag;
+ if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
+ // Special case for div8, just use a move with zero extension to AX to
+ // clear the upper 8 bits (AH).
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
+ if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
+ Move =
+ SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16,
+ MVT::Other, Ops,
+ array_lengthof(Ops)), 0);
+ Chain = Move.getValue(1);
+ ReplaceUses(N0.getValue(1), Chain);
} else {
+ Move =
+ SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
+ Chain = CurDAG->getEntryNode();
+ }
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
+ InFlag = Chain.getValue(1);
+ } else {
+ InFlag =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+ LoReg, N0, SDValue()).getValue(1);
+ if (isSigned && !signBitIsZero) {
+ // Sign extend the low part into the high part.
InFlag =
- CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
- LoReg, N0, SDValue()).getValue(1);
- if (isSigned && !signBitIsZero) {
- // Sign extend the low part into the high part.
- InFlag =
- SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
+ SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
+ } else {
+ // Zero out the high part, effectively zero extending the input.
+ SDValue ClrNode;
+
+ if (NVT.getSimpleVT() == MVT::i64) {
+ ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32),
+ 0);
+ // We just did a 32-bit clear, insert it into a 64-bit register to
+ // clear the whole 64-bit reg.
+ SDValue Undef =
+ SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
+ dl, MVT::i64), 0);
+ SDValue SubRegNo =
+ CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
+ ClrNode =
+ SDValue(CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
+ MVT::i64, Undef, ClrNode, SubRegNo),
+ 0);
} else {
- // Zero out the high part, effectively zero extending the input.
- SDValue ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT),
- 0);
- InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg,
- ClrNode, InFlag).getValue(1);
+ ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
}
- }
- if (foldedLoad) {
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
- InFlag };
- SDNode *CNode =
- CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
- array_lengthof(Ops));
- InFlag = SDValue(CNode, 1);
- // Update the chain.
- ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
- } else {
- InFlag =
- SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg,
+ ClrNode, InFlag).getValue(1);
}
+ }
- // Copy the division (low) result, if it is needed.
- if (!N.getValue(0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- LoReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(N.getValue(0), Result);
+ if (foldedLoad) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ SDNode *CNode =
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 1);
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ } else {
+ InFlag =
+ SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ }
+
+ // Copy the division (low) result, if it is needed.
+ if (!N.getValue(0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(N.getValue(0), Result);
#ifndef NDEBUG
- DOUT << std::string(Indent-2, ' ') << "=> ";
- DEBUG(Result.getNode()->dump(CurDAG));
- DOUT << "\n";
+ DEBUG({
+ errs() << std::string(Indent-2, ' ') << "=> ";
+ Result.getNode()->dump(CurDAG);
+ errs() << '\n';
+ });
#endif
+ }
+ // Copy the remainder (high) result, if it is needed.
+ if (!N.getValue(1).use_empty()) {
+ SDValue Result;
+ if (HiReg == X86::AH && Subtarget->is64Bit()) {
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)),
+ 0);
+ // Then truncate it down to i8.
+ Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ MVT::i8, Result);
+ } else {
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
}
- // Copy the remainder (high) result, if it is needed.
- if (!N.getValue(1).use_empty()) {
- SDValue Result;
- if (HiReg == X86::AH && Subtarget->is64Bit()) {
- // Prevent use of AH in a REX instruction by referencing AX instead.
- // Shift it down 8 bits.
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::AX, MVT::i16, InFlag);
- InFlag = Result.getValue(2);
- Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
- Result,
- CurDAG->getTargetConstant(8, MVT::i8)),
- 0);
- // Then truncate it down to i8.
- SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
- Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
- MVT::i8, Result, SRIdx), 0);
- } else {
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- }
- ReplaceUses(N.getValue(1), Result);
+ ReplaceUses(N.getValue(1), Result);
#ifndef NDEBUG
- DOUT << std::string(Indent-2, ' ') << "=> ";
- DEBUG(Result.getNode()->dump(CurDAG));
- DOUT << "\n";
+ DEBUG({
+ errs() << std::string(Indent-2, ' ') << "=> ";
+ Result.getNode()->dump(CurDAG);
+ errs() << '\n';
+ });
#endif
- }
+ }
#ifndef NDEBUG
- Indent -= 2;
+ Indent -= 2;
#endif
- return NULL;
- }
+ return NULL;
+ }
- case ISD::DECLARE: {
- // Handle DECLARE nodes here because the second operand may have been
- // wrapped in X86ISD::Wrapper.
- SDValue Chain = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
- SDValue N2 = Node->getOperand(2);
- FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
-
- // FIXME: We need to handle this for VLAs.
- if (!FINode) {
- ReplaceUses(N.getValue(0), Chain);
- return NULL;
+ case X86ISD::CMP: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
+ // use a smaller encoding.
+ if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ N0.getValueType() != MVT::i8 &&
+ X86::isZeroNode(N1)) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
+ if (!C) break;
+
+ // For example, convert "testl %eax, $8" to "testb %al, $8"
+ if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
+ (!(C->getZExtValue() & 0x80) ||
+ HasNoSignedComparisonUses(Node))) {
+ SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // On x86-32, only the ABCD registers have 8-bit subregisters.
+ if (!Subtarget->is64Bit()) {
+ TargetRegisterClass *TRC = 0;
+ switch (N0.getValueType().getSimpleVT().SimpleTy) {
+ case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+ case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+ default: llvm_unreachable("Unsupported TEST operand type!");
+ }
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+ Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+ Reg.getValueType(), Reg, RC), 0);
+ }
+
+ // Extract the l-register.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ MVT::i8, Reg);
+
+ // Emit a testb.
+ return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
}
-
- if (N2.getOpcode() == ISD::ADD &&
- N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg)
- N2 = N2.getOperand(1);
-
- // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled
- // somehow, just ignore it.
- if (N2.getOpcode() != X86ISD::Wrapper &&
- N2.getOpcode() != X86ISD::WrapperRIP) {
- ReplaceUses(N.getValue(0), Chain);
- return NULL;
+
+ // For example, "testl %eax, $2048" to "testb %ah, $8".
+ if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
+ (!(C->getZExtValue() & 0x8000) ||
+ HasNoSignedComparisonUses(Node))) {
+ // Shift the immediate right by 8 bits.
+ SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
+ MVT::i8);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // Put the value in an ABCD register.
+ TargetRegisterClass *TRC = 0;
+ switch (N0.getValueType().getSimpleVT().SimpleTy) {
+ case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
+ case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+ case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+ default: llvm_unreachable("Unsupported TEST operand type!");
+ }
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+ Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+ Reg.getValueType(), Reg, RC), 0);
+
+ // Extract the h-register.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl,
+ MVT::i8, Reg);
+
+ // Emit a testb. No special NOREX tricks are needed since there's
+ // only one GPR operand!
+ return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+ Subreg, ShiftedImm);
}
- GlobalAddressSDNode *GVNode =
- dyn_cast<GlobalAddressSDNode>(N2.getOperand(0));
- if (GVNode == 0) {
- ReplaceUses(N.getValue(0), Chain);
- return NULL;
+
+ // For example, "testl %eax, $32776" to "testw %ax, $32776".
+ if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
+ N0.getValueType() != MVT::i16 &&
+ (!(C->getZExtValue() & 0x8000) ||
+ HasNoSignedComparisonUses(Node))) {
+ SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // Extract the 16-bit subregister.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl,
+ MVT::i16, Reg);
+
+ // Emit a testw.
+ return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
+ }
+
+ // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
+ if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
+ N0.getValueType() == MVT::i64 &&
+ (!(C->getZExtValue() & 0x80000000) ||
+ HasNoSignedComparisonUses(Node))) {
+ SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // Extract the 32-bit subregister.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl,
+ MVT::i32, Reg);
+
+ // Emit a testl.
+ return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
}
- SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
- TLI.getPointerTy());
- SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(),
- TLI.getPointerTy());
- SDValue Ops[] = { Tmp1, Tmp2, Chain };
- return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
- MVT::Other, Ops,
- array_lengthof(Ops));
}
+ break;
+ }
}
SDNode *ResNode = SelectCode(N);
#ifndef NDEBUG
- DOUT << std::string(Indent-2, ' ') << "=> ";
- if (ResNode == NULL || ResNode == N.getNode())
- DEBUG(N.getNode()->dump(CurDAG));
- else
- DEBUG(ResNode->dump(CurDAG));
- DOUT << "\n";
+ DEBUG({
+ errs() << std::string(Indent-2, ' ') << "=> ";
+ if (ResNode == NULL || ResNode == N.getNode())
+ N.getNode()->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ errs() << '\n';
+ });
Indent -= 2;
#endif
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5a6294a..fadc818 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16,13 +16,16 @@
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
#include "X86TargetMachine.h"
+#include "X86TargetObjectFile.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalAlias.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
+#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -33,21 +36,48 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
static cl::opt<bool>
DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
+// Disable16Bit - 16-bit operations typically have a larger encoding than
+// corresponding 32-bit instructions, and 16-bit code is slow on some
+// processors. This is an experimental flag to disable 16-bit operations
+// (which forces them to be Legalized to 32-bit operations).
+static cl::opt<bool>
+Disable16Bit("disable-16bit", cl::Hidden,
+ cl::desc("Disable use of 16-bit instructions"));
+
// Forward declarations.
-static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2);
+static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
+ switch (TM.getSubtarget<X86Subtarget>().TargetType) {
+ default: llvm_unreachable("unknown subtarget type");
+ case X86Subtarget::isDarwin:
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return new X8664_MachoTargetObjectFile();
+ return new X8632_MachoTargetObjectFile();
+ case X86Subtarget::isELF:
+ return new TargetLoweringObjectFileELF();
+ case X86Subtarget::isMingw:
+ case X86Subtarget::isCygwin:
+ case X86Subtarget::isWindows:
+ return new TargetLoweringObjectFileCOFF();
+ }
+
+}
+
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
- : TargetLowering(TM) {
+ : TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
@@ -62,7 +92,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setShiftAmountType(MVT::i8);
setBooleanContents(ZeroOrOneBooleanContent);
setSchedulingPreference(SchedulingForRegPressure);
- setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
setStackPointerRegisterToSaveRestore(X86StackPtr);
if (Subtarget->isTargetDarwin()) {
@@ -80,7 +109,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Set up the register classes.
addRegisterClass(MVT::i8, X86::GR8RegisterClass);
- addRegisterClass(MVT::i16, X86::GR16RegisterClass);
+ if (!Disable16Bit)
+ addRegisterClass(MVT::i16, X86::GR16RegisterClass);
addRegisterClass(MVT::i32, X86::GR32RegisterClass);
if (Subtarget->is64Bit())
addRegisterClass(MVT::i64, X86::GR64RegisterClass);
@@ -89,9 +119,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// We don't accept any truncstore of integer registers.
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
- setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+ if (!Disable16Bit)
+ setTruncStoreAction(MVT::i64, MVT::i16, Expand);
setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
- setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+ if (!Disable16Bit)
+ setTruncStoreAction(MVT::i32, MVT::i16, Expand);
setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
@@ -242,8 +274,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
- setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
+ if (Disable16Bit) {
+ setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
+ } else {
+ setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
+ }
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
@@ -257,16 +294,22 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
// These should be promoted to a larger select which is supported.
- setOperationAction(ISD::SELECT , MVT::i1 , Promote);
- setOperationAction(ISD::SELECT , MVT::i8 , Promote);
+ setOperationAction(ISD::SELECT , MVT::i1 , Promote);
// X86 wants to expand cmov itself.
- setOperationAction(ISD::SELECT , MVT::i16 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i8 , Custom);
+ if (Disable16Bit)
+ setOperationAction(ISD::SELECT , MVT::i16 , Expand);
+ else
+ setOperationAction(ISD::SELECT , MVT::i16 , Custom);
setOperationAction(ISD::SELECT , MVT::i32 , Custom);
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
setOperationAction(ISD::SELECT , MVT::f80 , Custom);
setOperationAction(ISD::SETCC , MVT::i8 , Custom);
- setOperationAction(ISD::SETCC , MVT::i16 , Custom);
+ if (Disable16Bit)
+ setOperationAction(ISD::SETCC , MVT::i16 , Expand);
+ else
+ setOperationAction(ISD::SETCC , MVT::i16 , Custom);
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
@@ -275,8 +318,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SELECT , MVT::i64 , Custom);
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
}
- // X86 ret instruction may pop stack.
- setOperationAction(ISD::RET , MVT::Other, Custom);
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// Darwin ABI issue.
@@ -330,7 +371,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
}
- // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion.
+ // Use the default ISD::DBG_STOPPOINT.
setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
// FIXME - use subtarget debug flags
if (!Subtarget->isTargetDarwin() &&
@@ -637,6 +678,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::v4i16, Promote);
setOperationAction(ISD::SELECT, MVT::v2i32, Promote);
setOperationAction(ISD::SELECT, MVT::v1i64, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v8i8, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v4i16, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v2i32, Custom);
}
if (!UseSoftFloat && Subtarget->hasSSE1()) {
@@ -696,16 +740,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
+ EVT VT = (MVT::SimpleValueType)i;
// Do not attempt to custom lower non-power-of-2 vectors
if (!isPowerOf2_32(VT.getVectorNumElements()))
continue;
// Do not attempt to custom lower non-128-bit vectors
if (!VT.is128BitVector())
continue;
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR,
+ VT.getSimpleVT().SimpleTy, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE,
+ VT.getSimpleVT().SimpleTy, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT,
+ VT.getSimpleVT().SimpleTy, Custom);
}
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
@@ -722,22 +769,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
- MVT VT = (MVT::SimpleValueType)i;
+ MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+ EVT VT = SVT;
// Do not attempt to promote non-128-bit vectors
if (!VT.is128BitVector()) {
continue;
}
- setOperationAction(ISD::AND, VT, Promote);
- AddPromotedToType (ISD::AND, VT, MVT::v2i64);
- setOperationAction(ISD::OR, VT, Promote);
- AddPromotedToType (ISD::OR, VT, MVT::v2i64);
- setOperationAction(ISD::XOR, VT, Promote);
- AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
- setOperationAction(ISD::LOAD, VT, Promote);
- AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
- setOperationAction(ISD::SELECT, VT, Promote);
- AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
+ setOperationAction(ISD::AND, SVT, Promote);
+ AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
+ setOperationAction(ISD::OR, SVT, Promote);
+ AddPromotedToType (ISD::OR, SVT, MVT::v2i64);
+ setOperationAction(ISD::XOR, SVT, Promote);
+ AddPromotedToType (ISD::XOR, SVT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, SVT, Promote);
+ AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, SVT, Promote);
+ AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
}
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -847,7 +895,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
// This includes 256-bit vectors
for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
+ EVT VT = (MVT::SimpleValueType)i;
// Do not attempt to custom lower non-power-of-2 vectors
if (!isPowerOf2_32(VT.getVectorNumElements()))
@@ -861,7 +909,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->is64Bit()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom);
- }
+ }
#endif
#if 0
@@ -871,7 +919,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64.
// Including 256-bit vectors
for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) {
- MVT VT = (MVT::SimpleValueType)i;
+ EVT VT = (MVT::SimpleValueType)i;
if (!VT.is256BitVector()) {
continue;
@@ -933,13 +981,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
- allowUnalignedMemoryAccesses = true; // x86 supports it!
setPrefLoopAlignment(16);
benefitFromCodePlacementOpt = true;
}
-MVT X86TargetLowering::getSetCCResultType(MVT VT) const {
+MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i8;
}
@@ -993,7 +1040,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
/// determining it.
-MVT
+EVT
X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
bool isSrcConst, bool isSrcStr,
SelectionDAG &DAG) const {
@@ -1019,7 +1066,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
if (usesGlobalOffsetTable())
return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
- if (!Subtarget->isPICStyleRIPRel())
+ if (!Subtarget->is64Bit())
// This doesn't have DebugLoc associated with it, but is not really the
// same as a Register.
return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(),
@@ -1029,7 +1076,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
- return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4;
+ return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
}
//===----------------------------------------------------------------------===//
@@ -1038,16 +1085,16 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
#include "X86GenCallingConv.inc"
-/// LowerRET - Lower an ISD::RET node.
-SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
- DebugLoc dl = Op.getDebugLoc();
- assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
+SDValue
+X86TargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ DebugLoc dl, SelectionDAG &DAG) {
SmallVector<CCValAssign, 16> RVLocs;
- unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
- bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
- CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
- CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86);
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_X86);
// If this is the first return lowered for this function, add the regs to the
// liveout set for the function.
@@ -1056,49 +1103,19 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
if (RVLocs[i].isRegLoc())
DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
}
- SDValue Chain = Op.getOperand(0);
-
- // Handle tail call return.
- Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL);
- if (Chain.getOpcode() == X86ISD::TAILCALL) {
- SDValue TailCall = Chain;
- SDValue TargetAddress = TailCall.getOperand(1);
- SDValue StackAdjustment = TailCall.getOperand(2);
- assert(((TargetAddress.getOpcode() == ISD::Register &&
- (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX ||
- cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R11)) ||
- TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
- TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
- "Expecting an global address, external symbol, or register");
- assert(StackAdjustment.getOpcode() == ISD::Constant &&
- "Expecting a const value");
-
- SmallVector<SDValue,8> Operands;
- Operands.push_back(Chain.getOperand(0));
- Operands.push_back(TargetAddress);
- Operands.push_back(StackAdjustment);
- // Copy registers used by the call. Last operand is a flag so it is not
- // copied.
- for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
- Operands.push_back(Chain.getOperand(i));
- }
- return DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, &Operands[0],
- Operands.size());
- }
- // Regular return.
SDValue Flag;
SmallVector<SDValue, 6> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
- RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
+ RetOps.push_back(DAG.getTargetConstant(getBytesToPopOnReturn(), MVT::i16));
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue ValToCopy = Op.getOperand(i*2+1);
+ SDValue ValToCopy = Outs[i].Val;
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
@@ -1116,7 +1133,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
if (Subtarget->is64Bit()) {
- MVT ValVT = ValToCopy.getValueType();
+ EVT ValVT = ValToCopy.getValueType();
if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1)
@@ -1145,6 +1162,9 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
Flag = Chain.getValue(1);
+
+ // RAX now acts like a return value.
+ MF.getRegInfo().addLiveOut(X86::RAX);
}
RetOps[0] = Chain; // Update chain.
@@ -1157,36 +1177,32 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
MVT::Other, &RetOps[0], RetOps.size());
}
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) {
-/// LowerCallResult - Lower the result values of an ISD::CALL into the
-/// appropriate copies out of appropriate physical registers. This assumes that
-/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
-/// being lowered. The returns a SDNode with the same number of values as the
-/// ISD::CALL.
-SDNode *X86TargetLowering::
-LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
- unsigned CallingConv, SelectionDAG &DAG) {
-
- DebugLoc dl = TheCall->getDebugLoc();
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- bool isVarArg = TheCall->isVarArg();
bool Is64Bit = Subtarget->is64Bit();
- CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
- CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
-
- SmallVector<SDValue, 8> ResultVals;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
- MVT CopyVT = VA.getValVT();
+ EVT CopyVT = VA.getValVT();
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
- ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) {
- cerr << "SSE register return with SSE disabled\n";
- exit(1);
+ ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
+ llvm_report_error("SSE register return with SSE disabled");
}
// If this is a call to a function that returns an fp value on the floating
@@ -1206,7 +1222,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
MVT::v2i64, InFlag).getValue(1);
Val = Chain.getValue(0);
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
- Val, DAG.getConstant(0, MVT::i64));
+ Val, DAG.getConstant(0, MVT::i64));
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
MVT::i64, InFlag).getValue(1);
@@ -1228,13 +1244,10 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
DAG.getIntPtrConstant(1));
}
- ResultVals.push_back(Val);
+ InVals.push_back(Val);
}
- // Merge everything together with a MERGE_VALUES node.
- ResultVals.push_back(Chain);
- return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
- &ResultVals[0], ResultVals.size()).getNode();
+ return Chain;
}
@@ -1248,30 +1261,28 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
// For info on fast calling convention see Fast Calling Convention (tail call)
// implementation LowerX86_32FastCCCallTo.
-/// CallIsStructReturn - Determines whether a CALL node uses struct return
+/// CallIsStructReturn - Determines whether a call uses struct return
/// semantics.
-static bool CallIsStructReturn(CallSDNode *TheCall) {
- unsigned NumOps = TheCall->getNumArgs();
- if (!NumOps)
+static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ if (Outs.empty())
return false;
- return TheCall->getArgFlags(0).isSRet();
+ return Outs[0].Flags.isSRet();
}
-/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct
+/// ArgsAreStructReturn - Determines whether a function uses struct
/// return semantics.
-static bool ArgsAreStructReturn(SDValue Op) {
- unsigned NumArgs = Op.getNode()->getNumValues() - 1;
- if (!NumArgs)
+static bool
+ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+ if (Ins.empty())
return false;
- return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet();
+ return Ins[0].Flags.isSRet();
}
-/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires
-/// the callee to pop its own arguments. Callee pop is necessary to support tail
-/// calls.
-bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) {
+/// IsCalleePop - Determines whether the callee is required to pop its
+/// own arguments. Callee pop is necessary to support tail calls.
+bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
if (IsVarArg)
return false;
@@ -1289,7 +1300,7 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) {
/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
/// given CallingConvention value.
-CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
+CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
@@ -1305,36 +1316,18 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
return CC_X86_32_C;
}
-/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to
-/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node.
+/// NameDecorationForCallConv - Selects the appropriate decoration to
+/// apply to a MachineFunction containing a given calling convention.
NameDecorationStyle
-X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) {
- unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- if (CC == CallingConv::X86_FastCall)
+X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) {
+ if (CallConv == CallingConv::X86_FastCall)
return FastCall;
- else if (CC == CallingConv::X86_StdCall)
+ else if (CallConv == CallingConv::X86_StdCall)
return StdCall;
return None;
}
-/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer
-/// in a register before calling.
-bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) {
- return !IsTailCall && !Is64Bit &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT();
-}
-
-/// CallRequiresFnAddressInReg - Check whether the call requires the function
-/// address to be loaded in a register.
-bool
-X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) {
- return !Is64Bit && IsTailCall &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT();
-}
-
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
@@ -1348,35 +1341,52 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
/*AlwaysInline=*/true, NULL, 0, NULL, 0);
}
-SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG,
- const CCValAssign &VA,
- MachineFrameInfo *MFI,
- unsigned CC,
- SDValue Root, unsigned i) {
+SDValue
+X86TargetLowering::LowerMemArgument(SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ unsigned i) {
+
// Create the nodes corresponding to a load from this parameter slot.
- ISD::ArgFlagsTy Flags =
- cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags();
- bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && PerformTailCallOpt;
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+ EVT ValVT;
+
+ // If value is passed by pointer we have address passed instead of the value
+ // itself.
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ ValVT = VA.getLocVT();
+ else
+ ValVT = VA.getValVT();
// FIXME: For now, all byval parameter objects are marked mutable. This can be
// changed with more analysis.
// In case of tail call optimization mark all arguments mutable. Since they
// could be overwritten by lowering of arguments in case of a tail call.
- int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal())
return FIN;
- return DAG.getLoad(VA.getValVT(), Op.getDebugLoc(), Root, FIN,
+ return DAG.getLoad(ValVT, dl, Chain, FIN,
PseudoSourceValue::getFixedStack(FI), 0);
}
SDValue
-X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) {
+
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- DebugLoc dl = Op.getDebugLoc();
const Function* Fn = MF.getFunction();
if (Fn->hasExternalLinkage() &&
@@ -1385,25 +1395,23 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
FuncInfo->setForceFramePointer(true);
// Decorate the function name.
- FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op));
+ FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv));
MachineFrameInfo *MFI = MF.getFrameInfo();
- SDValue Root = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
- unsigned CC = MF.getFunction()->getCallingConv();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isTargetWin64();
- assert(!(isVarArg && CC == CallingConv::Fast) &&
+ assert(!(isVarArg && CallConv == CallingConv::Fast) &&
"Var args not supported with calling convention fastcc");
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC));
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
- SmallVector<SDValue, 8> ArgValues;
unsigned LastVal = ~0U;
+ SDValue ArgValue;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
@@ -1413,7 +1421,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
LastVal = VA.getValNo();
if (VA.isRegLoc()) {
- MVT RegVT = VA.getLocVT();
+ EVT RegVT = VA.getLocVT();
TargetRegisterClass *RC = NULL;
if (RegVT == MVT::i32)
RC = X86::GR32RegisterClass;
@@ -1425,27 +1433,13 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
RC = X86::FR64RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
RC = X86::VR128RegisterClass;
- else if (RegVT.isVector()) {
- assert(RegVT.getSizeInBits() == 64);
- if (!Is64Bit)
- RC = X86::VR64RegisterClass; // MMX values are passed in MMXs.
- else {
- // Darwin calling convention passes MMX values in either GPRs or
- // XMMs in x86-64. Other targets pass them in memory.
- if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) {
- RC = X86::VR128RegisterClass; // MMX values are passed in XMMs.
- RegVT = MVT::v2i64;
- } else {
- RC = X86::GR64RegisterClass; // v1i64 values are passed in GPRs.
- RegVT = MVT::i64;
- }
- }
- } else {
- assert(0 && "Unknown argument type!");
- }
+ else if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
+ RC = X86::VR64RegisterClass;
+ else
+ llvm_unreachable("Unknown argument type!");
- unsigned Reg = DAG.getMachineFunction().addLiveIn(VA.getLocReg(), RC);
- SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
// If this is an 8 or 16-bit value, it is really passed promoted to 32
// bits. Insert an assert[sz]ext to capture this, then truncate to the
@@ -1456,52 +1450,53 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
else if (VA.getLocInfo() == CCValAssign::ZExt)
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::BCvt)
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
- if (VA.getLocInfo() != CCValAssign::Full)
- ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
-
- // Handle MMX values passed in GPRs.
- if (Is64Bit && RegVT != VA.getLocVT()) {
- if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass)
- ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
- else if (RC == X86::VR128RegisterClass) {
+ if (VA.isExtInLoc()) {
+ // Handle MMX values passed in XMM regs.
+ if (RegVT.isVector()) {
ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
ArgValue, DAG.getConstant(0, MVT::i64));
- ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
- }
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+ } else
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
}
-
- ArgValues.push_back(ArgValue);
} else {
assert(VA.isMemLoc());
- ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i));
+ ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
}
+
+ // If value is passed via pointer - do a load.
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0);
+
+ InVals.push_back(ArgValue);
}
// The x86-64 ABI for returning structs by value requires that we copy
// the sret argument into %rax for the return. Save the argument into
// a virtual register so that we can access it from the return points.
- if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
- MachineFunction &MF = DAG.getMachineFunction();
+ if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
FuncInfo->setSRetReturnReg(Reg);
}
- SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]);
- Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root);
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
}
unsigned StackSize = CCInfo.getNextStackOffset();
// align stack specially for tail calls
- if (PerformTailCallOpt && CC == CallingConv::Fast)
+ if (PerformTailCallOpt && CallConv == CallingConv::Fast)
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- if (Is64Bit || CC != CallingConv::X86_FastCall) {
+ if (Is64Bit || CallConv != CallingConv::X86_FastCall) {
VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
}
if (Is64Bit) {
@@ -1558,75 +1553,81 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
- SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
- DAG.getIntPtrConstant(VarArgsGPOffset));
+ unsigned Offset = VarArgsGPOffset;
for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
+ SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(Offset));
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
X86::GR64RegisterClass);
- SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
+ PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
+ Offset);
MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
- DAG.getIntPtrConstant(8));
+ Offset += 8;
}
- // Now store the XMM (fp + vector) parameter registers.
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
- DAG.getIntPtrConstant(VarArgsFPOffset));
- for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
- unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
- X86::VR128RegisterClass);
- SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::v4f32);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
- DAG.getIntPtrConstant(16));
+ if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
+ // Now store the XMM (fp + vector) parameter registers.
+ SmallVector<SDValue, 11> SaveXMMOps;
+ SaveXMMOps.push_back(Chain);
+
+ unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
+ SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
+ SaveXMMOps.push_back(ALVal);
+
+ SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex));
+ SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset));
+
+ for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
+ unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
+ X86::VR128RegisterClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
+ SaveXMMOps.push_back(Val);
+ }
+ MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
+ MVT::Other,
+ &SaveXMMOps[0], SaveXMMOps.size()));
}
+
if (!MemOps.empty())
- Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
}
}
- ArgValues.push_back(Root);
-
// Some CCs need callee pop.
- if (IsCalleePop(isVarArg, CC)) {
+ if (IsCalleePop(isVarArg, CallConv)) {
BytesToPopOnReturn = StackSize; // Callee pops everything.
BytesCallerReserves = 0;
} else {
BytesToPopOnReturn = 0; // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
- if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op))
+ if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins))
BytesToPopOnReturn = 4;
BytesCallerReserves = StackSize;
}
if (!Is64Bit) {
RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only.
- if (CC == CallingConv::X86_FastCall)
+ if (CallConv == CallingConv::X86_FastCall)
VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
}
FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
- // Return the new list of results.
- return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
- &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+ return Chain;
}
SDValue
-X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
- const SDValue &StackPtr,
+X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
- SDValue Chain,
- SDValue Arg, ISD::ArgFlagsTy Flags) {
- DebugLoc dl = TheCall->getDebugLoc();
- unsigned LocMemOffset = VA.getLocMemOffset();
+ ISD::ArgFlagsTy Flags) {
+ const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
+ unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
if (Flags.isByVal()) {
@@ -1649,7 +1650,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
if (!IsTailCall || FPDiff==0) return Chain;
// Adjust the Return address stack slot.
- MVT VT = getPointerTy();
+ EVT VT = getPointerTy();
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
@@ -1669,41 +1670,45 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
- MVT VT = Is64Bit ? MVT::i64 : MVT::i32;
+ EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0);
return Chain;
}
-SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
+SDValue
+X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) {
+
MachineFunction &MF = DAG.getMachineFunction();
- CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
- SDValue Chain = TheCall->getChain();
- unsigned CC = TheCall->getCallingConv();
- bool isVarArg = TheCall->isVarArg();
- bool IsTailCall = TheCall->isTailCall() &&
- CC == CallingConv::Fast && PerformTailCallOpt;
- SDValue Callee = TheCall->getCallee();
bool Is64Bit = Subtarget->is64Bit();
- bool IsStructRet = CallIsStructReturn(TheCall);
- DebugLoc dl = TheCall->getDebugLoc();
+ bool IsStructRet = CallIsStructReturn(Outs);
- assert(!(isVarArg && CC == CallingConv::Fast) &&
+ assert((!isTailCall ||
+ (CallConv == CallingConv::Fast && PerformTailCallOpt)) &&
+ "IsEligibleForTailCallOptimization missed a case!");
+ assert(!(isVarArg && CallConv == CallingConv::Fast) &&
"Var args not supported with calling convention fastcc");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC));
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
- if (PerformTailCallOpt && CC == CallingConv::Fast)
+ if (PerformTailCallOpt && CallConv == CallingConv::Fast)
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
- if (IsTailCall) {
+ if (isTailCall) {
// Lower arguments at fp - stackoffset + fpdiff.
unsigned NumBytesCallerPushed =
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
@@ -1719,7 +1724,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
SDValue RetAddrFrIdx;
// Load return adress for tail calls.
- Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit,
+ Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit,
FPDiff, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
@@ -1730,57 +1735,54 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
// of tail call optimization arguments are handle later.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = TheCall->getArg(i);
- ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = Outs[i].Val;
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
bool isByVal = Flags.isByVal();
// Promote the value if needed.
switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
+ default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
+ // Special case: passing MMX values in XMM registers.
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+ Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
+ Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
+ } else
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg);
+ break;
+ case CCValAssign::Indirect: {
+ // Store the argument.
+ SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
+ PseudoSourceValue::getFixedStack(FI), 0);
+ Arg = SpillSlot;
+ break;
+ }
}
if (VA.isRegLoc()) {
- if (Is64Bit) {
- MVT RegVT = VA.getLocVT();
- if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
- switch (VA.getLocReg()) {
- default:
- break;
- case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX:
- case X86::R8: {
- // Special case: passing MMX values in GPR registers.
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
- break;
- }
- case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
- case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: {
- // Special case: passing MMX values in XMM registers.
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
- Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
- Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
- break;
- }
- }
- }
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
- if (!IsTailCall || (IsTailCall && isByVal)) {
+ if (!isTailCall || (isTailCall && isByVal)) {
assert(VA.isMemLoc());
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
- MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
- Chain, Arg, Flags));
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+ dl, DAG, VA, Flags));
}
}
}
@@ -1794,37 +1796,41 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
SDValue InFlag;
// Tail call byval lowering might overwrite argument registers so in case of
// tail call optimization the copies to registers are lowered later.
- if (!IsTailCall)
+ if (!isTailCall)
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
- // ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
- if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) {
- Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg,
- DebugLoc::getUnknownLoc(),
- getPointerTy()),
- InFlag);
- InFlag = Chain.getValue(1);
- }
- // If we are tail calling and generating PIC/GOT style code load the address
- // of the callee into ecx. The value in ecx is used as target of the tail
- // jump. This is done to circumvent the ebx/callee-saved problem for tail
- // calls on PIC/GOT architectures. Normally we would just put the address of
- // GOT into ebx and then call target@PLT. But for tail callss ebx would be
- // restored (since ebx is callee saved) before jumping to the target@PLT.
- if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) {
- // Note: The actual moving to ecx is done further down.
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (G && !G->getGlobal()->hasHiddenVisibility() &&
- !G->getGlobal()->hasProtectedVisibility())
- Callee = LowerGlobalAddress(Callee, DAG);
- else if (isa<ExternalSymbolSDNode>(Callee))
- Callee = LowerExternalSymbol(Callee,DAG);
+
+ if (Subtarget->isPICStyleGOT()) {
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (!isTailCall) {
+ Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(),
+ getPointerTy()),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ // If we are tail calling and generating PIC/GOT style code load the
+ // address of the callee into ECX. The value in ecx is used as target of
+ // the tail jump. This is done to circumvent the ebx/callee-saved problem
+ // for tail calls on PIC/GOT architectures. Normally we would just put the
+ // address of GOT into ebx and then call target@PLT. But for tail calls
+ // ebx would be restored (since ebx is callee saved) before jumping to the
+ // target@PLT.
+
+ // Note: The actual moving to ECX is done further down.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (G && !G->getGlobal()->hasHiddenVisibility() &&
+ !G->getGlobal()->hasProtectedVisibility())
+ Callee = LowerGlobalAddress(Callee, DAG);
+ else if (isa<ExternalSymbolSDNode>(Callee))
+ Callee = LowerExternalSymbol(Callee, DAG);
+ }
}
if (Is64Bit && isVarArg) {
@@ -1853,7 +1859,15 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
// For tail calls lower the arguments to the 'real' stack slot.
- if (IsTailCall) {
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+ SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
+
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
@@ -1863,8 +1877,8 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc()) {
assert(VA.isMemLoc());
- SDValue Arg = TheCall->getArg(i);
- ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+ SDValue Arg = Outs[i].Val;
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
@@ -1879,12 +1893,13 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
getPointerTy());
Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
- MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
+ MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
+ ArgChain,
Flags, DAG, dl));
} else {
// Store relative to framepointer.
MemOpChains2.push_back(
- DAG.getStore(Chain, dl, Arg, FIN,
+ DAG.getStore(ArgChain, dl, Arg, FIN,
PseudoSourceValue::getFixedStack(FI), 0));
}
}
@@ -1912,13 +1927,49 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
- if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
- getTargetMachine(), true))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(),
- G->getOffset());
+ GlobalValue *GV = G->getGlobal();
+ if (!GV->hasDLLImportLinkage()) {
+ unsigned char OpFlags = 0;
+
+ // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+ // external symbols most go through the PLT in PIC mode. If the symbol
+ // has hidden or protected visibility, or if it is static or local, then
+ // we don't need to use the PLT - we can directly call it.
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ Subtarget->getDarwinVers() < 9) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ }
+
+ Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(),
+ G->getOffset(), OpFlags);
+ }
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
- } else if (IsTailCall) {
+ unsigned char OpFlags = 0;
+
+ // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
+ // symbols should go through the PLT.
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ Subtarget->getDarwinVers() < 9) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ }
+
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+ OpFlags);
+ } else if (isTailCall) {
unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl,
@@ -1926,27 +1977,23 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
Callee,InFlag);
Callee = DAG.getRegister(Opc, getPointerTy());
// Add register as live out.
- DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
+ MF.getRegInfo().addLiveOut(Opc);
}
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDValue, 8> Ops;
- if (IsTailCall) {
+ if (isTailCall) {
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(0, true), InFlag);
InFlag = Chain.getValue(1);
-
- // Returns a chain & a flag for retval copy to use.
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
}
Ops.push_back(Chain);
Ops.push_back(Callee);
- if (IsTailCall)
+ if (isTailCall)
Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
// Add argument registers to the end of the list so that they are known live
@@ -1956,9 +2003,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
RegsToPass[i].second.getValueType()));
// Add an implicit use GOT pointer in EBX.
- if (!IsTailCall && !Is64Bit &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT())
+ if (!isTailCall && Subtarget->isPICStyleGOT())
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
// Add an implicit use of AL for x86 vararg functions.
@@ -1968,13 +2013,28 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
if (InFlag.getNode())
Ops.push_back(InFlag);
- if (IsTailCall) {
- assert(InFlag.getNode() &&
- "Flag must be set. Depend on flag being set in LowerRET");
- Chain = DAG.getNode(X86ISD::TAILCALL, dl,
- TheCall->getVTList(), &Ops[0], Ops.size());
+ if (isTailCall) {
+ // If this is the first return lowered for this function, add the regs
+ // to the liveout set for the function.
+ if (MF.getRegInfo().liveout_empty()) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
+ *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ assert(((Callee.getOpcode() == ISD::Register &&
+ (cast<RegisterSDNode>(Callee)->getReg() == X86::EAX ||
+ cast<RegisterSDNode>(Callee)->getReg() == X86::R9)) ||
+ Callee.getOpcode() == ISD::TargetExternalSymbol ||
+ Callee.getOpcode() == ISD::TargetGlobalAddress) &&
+ "Expecting an global address, external symbol, or register");
- return SDValue(Chain.getNode(), Op.getResNo());
+ return DAG.getNode(X86ISD::TC_RETURN, dl,
+ NodeTys, &Ops[0], Ops.size());
}
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
@@ -1982,9 +2042,9 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush;
- if (IsCalleePop(isVarArg, CC))
+ if (IsCalleePop(isVarArg, CallConv))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
- else if (!Is64Bit && CC != CallingConv::Fast && IsStructRet)
+ else if (!Is64Bit && CallConv != CallingConv::Fast && IsStructRet)
// If this is is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
@@ -2002,8 +2062,8 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
// Handle result values, copying them out of physregs into vregs that we
// return.
- return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
- Op.getResNo());
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
}
@@ -2060,36 +2120,18 @@ unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
return Offset;
}
-/// IsEligibleForTailCallElimination - Check to see whether the next instruction
-/// following the call is a return. A function is eligible if caller/callee
-/// calling conventions match, currently only fastcc supports tail calls, and
-/// the function CALL is immediatly followed by a RET.
-bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall,
- SDValue Ret,
- SelectionDAG& DAG) const {
- if (!PerformTailCallOpt)
- return false;
-
- if (CheckTailCallReturnConstraints(TheCall, Ret)) {
- MachineFunction &MF = DAG.getMachineFunction();
- unsigned CallerCC = MF.getFunction()->getCallingConv();
- unsigned CalleeCC= TheCall->getCallingConv();
- if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
- SDValue Callee = TheCall->getCallee();
- // On x86/32Bit PIC/GOT tail calls are supported.
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
- !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit())
- return true;
-
- // Can only do local tail calls (in same module, hidden or protected) on
- // x86_64 PIC/GOT at the moment.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- return G->getGlobal()->hasHiddenVisibility()
- || G->getGlobal()->hasProtectedVisibility();
- }
- }
-
- return false;
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
+ return CalleeCC == CallingConv::Fast && CallerCC == CalleeCC;
}
FastISel *
@@ -2133,6 +2175,36 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
}
+bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+ bool hasSymbolicDisplacement) {
+ // Offset should fit into 32 bit immediate field.
+ if (!isInt32(Offset))
+ return false;
+
+ // If we don't have a symbolic displacement - we don't have any extra
+ // restrictions.
+ if (!hasSymbolicDisplacement)
+ return true;
+
+ // FIXME: Some tweaks might be needed for medium code model.
+ if (M != CodeModel::Small && M != CodeModel::Kernel)
+ return false;
+
+ // For small code model we assume that latest object is 16MB before end of 31
+ // bits boundary. We may also accept pretty large negative constants knowing
+ // that all objects are in the positive half of address space.
+ if (M == CodeModel::Small && Offset < 16*1024*1024)
+ return true;
+
+ // For kernel code model we know that all object resist in the negative half
+ // of 32bits address space. We may not accept negative offsets, since they may
+ // be just off and we may accept pretty large positive ones.
+ if (M == CodeModel::Kernel && Offset > 0)
+ return true;
+
+ return false;
+}
+
/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
/// specific condition code, returning the condition code and the LHS/RHS of the
/// comparison to make.
@@ -2155,7 +2227,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
}
switch (SetCCOpcode) {
- default: assert(0 && "Invalid integer condition!");
+ default: llvm_unreachable("Invalid integer condition!");
case ISD::SETEQ: return X86::COND_E;
case ISD::SETGT: return X86::COND_G;
case ISD::SETGE: return X86::COND_GE;
@@ -2195,7 +2267,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
// 1 | 0 | 0 | X == Y
// 1 | 1 | 1 | unordered
switch (SetCCOpcode) {
- default: assert(0 && "Condcode should be pre-legalized away");
+ default: llvm_unreachable("Condcode should be pre-legalized away");
case ISD::SETUEQ:
case ISD::SETEQ: return X86::COND_E;
case ISD::SETOLT: // flipped
@@ -2253,7 +2325,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
/// the second operand.
-static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16)
return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
if (VT == MVT::v2f64 || VT == MVT::v2i64)
@@ -2262,68 +2334,68 @@ static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, MVT VT) {
}
bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
+ SmallVector<int, 8> M;
N->getMask(M);
return ::isPSHUFDMask(M, N->getValueType(0));
}
/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
if (VT != MVT::v8i16)
return false;
-
+
// Lower quadword copied in order or undef.
for (int i = 0; i != 4; ++i)
if (Mask[i] >= 0 && Mask[i] != i)
return false;
-
+
// Upper quadword shuffled.
for (int i = 4; i != 8; ++i)
if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
return false;
-
+
return true;
}
bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
+ SmallVector<int, 8> M;
N->getMask(M);
return ::isPSHUFHWMask(M, N->getValueType(0));
}
/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
if (VT != MVT::v8i16)
return false;
-
+
// Upper quadword copied in order.
for (int i = 4; i != 8; ++i)
if (Mask[i] >= 0 && Mask[i] != i)
return false;
-
+
// Lower quadword shuffled.
for (int i = 0; i != 4; ++i)
if (Mask[i] >= 4)
return false;
-
+
return true;
}
bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
+ SmallVector<int, 8> M;
N->getMask(M);
return ::isPSHUFLWMask(M, N->getValueType(0));
}
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to SHUFP*.
-static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
int NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
return false;
-
+
int Half = NumElems / 2;
for (int i = 0; i < Half; ++i)
if (!isUndefOrInRange(Mask[i], 0, NumElems))
@@ -2331,7 +2403,7 @@ static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) {
for (int i = Half; i < NumElems; ++i)
if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
return false;
-
+
return true;
}
@@ -2345,12 +2417,12 @@ bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
/// half elements to come from vector 1 (which would equal the dest.) and
/// the upper half to come from vector 2.
-static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
int NumElems = VT.getVectorNumElements();
-
- if (NumElems != 2 && NumElems != 4)
+
+ if (NumElems != 2 && NumElems != 4)
return false;
-
+
int Half = NumElems / 2;
for (int i = 0; i < Half; ++i)
if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
@@ -2424,24 +2496,24 @@ bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
/// <2, 3, 2, 3>
bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
unsigned NumElems = N->getValueType(0).getVectorNumElements();
-
+
if (NumElems != 4)
return false;
-
- return isUndefOrEqual(N->getMaskElt(0), 2) &&
+
+ return isUndefOrEqual(N->getMaskElt(0), 2) &&
isUndefOrEqual(N->getMaskElt(1), 3) &&
- isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
isUndefOrEqual(N->getMaskElt(3), 3);
}
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
-static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, MVT VT,
+static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool V2IsSplat = false) {
int NumElts = VT.getVectorNumElements();
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
return false;
-
+
for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
int BitI = Mask[i];
int BitI1 = Mask[i+1];
@@ -2466,12 +2538,12 @@ bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
-static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, MVT VT,
+static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool V2IsSplat = false) {
int NumElts = VT.getVectorNumElements();
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
return false;
-
+
for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
int BitI = Mask[i];
int BitI1 = Mask[i+1];
@@ -2497,11 +2569,11 @@ bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
-static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
int NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
return false;
-
+
for (int i = 0, j = 0; i != NumElems; i += 2, ++j) {
int BitI = Mask[i];
int BitI1 = Mask[i+1];
@@ -2522,11 +2594,11 @@ bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) {
/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
/// <2, 2, 3, 3>
-static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
int NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
return false;
-
+
for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
int BitI = Mask[i];
int BitI1 = Mask[i+1];
@@ -2547,19 +2619,19 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
-static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+static bool isMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT) {
if (VT.getVectorElementType().getSizeInBits() < 32)
return false;
int NumElts = VT.getVectorNumElements();
-
+
if (!isUndefOrEqual(Mask[0], NumElts))
return false;
-
+
for (int i = 1; i < NumElts; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
-
+
return true;
}
@@ -2572,21 +2644,21 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
-static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT,
+static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool V2IsSplat = false, bool V2IsUndef = false) {
int NumOps = VT.getVectorNumElements();
if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
return false;
-
+
if (!isUndefOrEqual(Mask[0], 0))
return false;
-
+
for (int i = 1; i < NumOps; ++i)
if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
(V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
(V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
return false;
-
+
return true;
}
@@ -2650,7 +2722,7 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) {
/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
int e = N->getValueType(0).getVectorNumElements() / 2;
-
+
for (int i = 0; i < e; ++i)
if (!isUndefOrEqual(N->getMaskElt(i), i))
return false;
@@ -2714,14 +2786,23 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
return Mask;
}
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+bool X86::isZeroNode(SDValue Elt) {
+ return ((isa<ConstantSDNode>(Elt) &&
+ cast<ConstantSDNode>(Elt)->getZExtValue() == 0) ||
+ (isa<ConstantFPSDNode>(Elt) &&
+ cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+}
+
/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
/// their permute mask.
static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- MVT VT = SVOp->getValueType(0);
+ EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> MaskVec;
-
+
for (unsigned i = 0; i != NumElems; ++i) {
int idx = SVOp->getMaskElt(i);
if (idx < 0)
@@ -2737,7 +2818,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
/// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, MVT VT) {
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
unsigned NumElems = VT.getVectorNumElements();
for (unsigned i = 0; i != NumElems; ++i) {
int idx = Mask[i];
@@ -2795,7 +2876,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
return false;
unsigned NumElems = Op->getValueType(0).getVectorNumElements();
-
+
if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
@@ -2820,17 +2901,8 @@ static bool isSplatVector(SDNode *N) {
return true;
}
-/// isZeroNode - Returns true if Elt is a constant zero or a floating point
-/// constant +0.0.
-static inline bool isZeroNode(SDValue Elt) {
- return ((isa<ConstantSDNode>(Elt) &&
- cast<ConstantSDNode>(Elt)->getZExtValue() == 0) ||
- (isa<ConstantFPSDNode>(Elt) &&
- cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
-}
-
/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
-/// to an zero vector.
+/// to an zero vector.
/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
static bool isZeroShuffle(ShuffleVectorSDNode *N) {
SDValue V1 = N->getOperand(0);
@@ -2842,13 +2914,15 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
unsigned Opc = V2.getOpcode();
if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
continue;
- if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems)))
+ if (Opc != ISD::BUILD_VECTOR ||
+ !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
return false;
} else if (Idx >= 0) {
unsigned Opc = V1.getOpcode();
if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
continue;
- if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx)))
+ if (Opc != ISD::BUILD_VECTOR ||
+ !X86::isZeroNode(V1.getOperand(Idx)))
return false;
}
}
@@ -2857,7 +2931,7 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
/// getZeroVector - Returns a vector of specified type with all zero elements.
///
-static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG,
+static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
@@ -2879,7 +2953,7 @@ static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG,
/// getOnesVector - Returns a vector of specified type with all bits set.
///
-static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
+static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
// Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
@@ -2897,13 +2971,13 @@ static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
- MVT VT = SVOp->getValueType(0);
+ EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
-
+
bool Changed = false;
SmallVector<int, 8> MaskVec;
SVOp->getMask(MaskVec);
-
+
for (unsigned i = 0; i != NumElems; ++i) {
if (MaskVec[i] > (int)NumElems) {
MaskVec[i] = NumElems;
@@ -2918,7 +2992,7 @@ static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
/// operation of specified width.
-static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
@@ -2929,7 +3003,7 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
}
/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
-static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
@@ -2941,7 +3015,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
}
/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation.
-static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
unsigned Half = NumElems/2;
@@ -2954,13 +3028,13 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
}
/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32.
-static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
bool HasSSE2) {
if (SV->getValueType(0).getVectorNumElements() <= 4)
return SDValue(SV, 0);
-
- MVT PVT = MVT::v4f32;
- MVT VT = SV->getValueType(0);
+
+ EVT PVT = MVT::v4f32;
+ EVT VT = SV->getValueType(0);
DebugLoc dl = SV->getDebugLoc();
SDValue V1 = SV->getOperand(0);
int NumElems = VT.getVectorNumElements();
@@ -2976,7 +3050,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
}
NumElems >>= 1;
}
-
+
// Perform the splat.
int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1);
@@ -2991,7 +3065,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
bool isZero, bool HasSSE2,
SelectionDAG &DAG) {
- MVT VT = V2.getValueType();
+ EVT VT = V2.getValueType();
SDValue V1 = isZero
? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
unsigned NumElems = VT.getVectorNumElements();
@@ -3016,7 +3090,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
continue;
}
SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index);
- if (Elt.getNode() && isZeroNode(Elt))
+ if (Elt.getNode() && X86::isZeroNode(Elt))
++NumZeros;
else
break;
@@ -3142,11 +3216,11 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
/// getVShift - Return a vector logical shift node.
///
-static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp,
+static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, DebugLoc dl) {
bool isMMX = VT.getSizeInBits() == 64;
- MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
+ EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
@@ -3171,9 +3245,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
}
- MVT VT = Op.getValueType();
- MVT EVT = VT.getVectorElementType();
- unsigned EVTBits = EVT.getSizeInBits();
+ EVT VT = Op.getValueType();
+ EVT ExtVT = VT.getVectorElementType();
+ unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumElems = Op.getNumOperands();
unsigned NumZero = 0;
@@ -3189,7 +3263,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
if (Elt.getOpcode() != ISD::Constant &&
Elt.getOpcode() != ISD::ConstantFP)
IsAllConstants = false;
- if (isZeroNode(Elt))
+ if (X86::isZeroNode(Elt))
NumZero++;
else {
NonZeros |= (1 << i);
@@ -3212,11 +3286,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// insertion that way. Only do this if the value is non-constant or if the
// value is a constant being inserted into element 0. It is cheaper to do
// a constant pool load than it is to do a movd + shuffle.
- if (EVT == MVT::i64 && !Subtarget->is64Bit() &&
+ if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
(!IsAllConstants || Idx == 0)) {
if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
// Handle MMX and SSE both.
- MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
+ EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
unsigned VecElts = VT == MVT::v2i64 ? 4 : 2;
// Truncate the value (which may itself be a constant) to i32, and
@@ -3234,7 +3308,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
for (unsigned i = 1; i != VecElts; ++i)
Mask.push_back(i);
Item = DAG.getVectorShuffle(VecVT, dl, Item,
- DAG.getUNDEF(Item.getValueType()),
+ DAG.getUNDEF(Item.getValueType()),
&Mask[0]);
}
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item);
@@ -3248,15 +3322,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
if (Idx == 0) {
if (NumZero == 0) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
- } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 ||
- (EVT == MVT::i64 && Subtarget->is64Bit())) {
+ } else if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
+ (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
DAG);
- } else if (EVT == MVT::i16 || EVT == MVT::i8) {
+ } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
- MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
+ EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
Subtarget->hasSSE2(), DAG);
@@ -3266,7 +3340,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// Is it a vector logical left shift?
if (NumElems == 2 && Idx == 1 &&
- isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) {
+ X86::isZeroNode(Op.getOperand(0)) &&
+ !X86::isZeroNode(Op.getOperand(1))) {
unsigned NumBits = VT.getSizeInBits();
return getVShift(true, VT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
@@ -3374,9 +3449,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// If we have SSE 4.1, Expand into a number of inserts unless the number of
// values to be inserted is equal to the number of elements, in which case
// use the unpack code below in the hopes of matching the consecutive elts
- // load merge pattern for shuffles.
+ // load merge pattern for shuffles.
// FIXME: We could probably just check that here directly.
- if (Values.size() < NumElems && VT.getSizeInBits() == 128 &&
+ if (Values.size() < NumElems && VT.getSizeInBits() == 128 &&
getSubtarget()->hasSSE41()) {
V[0] = DAG.getUNDEF(VT);
for (unsigned i = 0; i < NumElems; ++i)
@@ -3457,7 +3532,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
}
// For SSSE3, If all 8 words of the result come from only 1 quadword of each
- // of the two input vectors, shuffle them into one input vector so only a
+ // of the two input vectors, shuffle them into one input vector so only a
// single pshufb instruction is necessary. If There are more than 2 input
// quads, disable the next transformation since it does not help SSSE3.
bool V1Used = InputQuads[0] || InputQuads[1];
@@ -3481,7 +3556,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
SmallVector<int, 8> MaskV;
MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
- NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
+ NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1),
DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]);
NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV);
@@ -3506,7 +3581,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
int idx = MaskVals[i];
if (idx < 0)
continue;
- idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
+ idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
if ((idx != i) && idx < 4)
pshufhw = false;
if ((idx != i) && idx > 3)
@@ -3521,19 +3596,19 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
// If we've eliminated the use of V2, and the new mask is a pshuflw or
// pshufhw, that's as cheap as it gets. Return the new shuffle.
if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
- return DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
+ return DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
}
}
-
+
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
if (TLI.getSubtarget()->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
-
+
// If we have elements from both input vectors, set the high bit of the
- // shuffle mask element to zero out elements that come from V2 in the V1
+ // shuffle mask element to zero out elements that come from V2 in the V1
// mask, and elements that come from V1 in the V2 mask, so that the two
// results can be OR'd together.
bool TwoInputs = V1Used && V2Used;
@@ -3548,12 +3623,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
}
V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1);
- V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+ V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
if (!TwoInputs)
return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
-
+
// Calculate the shuffle mask for the second input, shuffle it, and
// OR it with the first shuffled input.
pshufbMask.clear();
@@ -3568,7 +3643,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
}
V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2);
- V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+ V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
@@ -3597,7 +3672,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
}
-
+
// If BestHi >= 0, generate a pshufhw to put the high elements in order,
// and update MaskVals with the new element order.
if (BestHiQuad >= 0) {
@@ -3619,7 +3694,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
}
-
+
// In case BestHi & BestLo were both -1, which means each quadword has a word
// from each of the four input quadwords, calculate the InOrder bitvector now
// before falling through to the insert/extract cleanup.
@@ -3629,7 +3704,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
if (MaskVals[i] < 0 || MaskVals[i] == i)
InOrder.set(i);
}
-
+
// The other elements are put in the right place using pextrw and pinsrw.
for (unsigned i = 0; i != 8; ++i) {
if (InOrder[i])
@@ -3660,9 +3735,9 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
DebugLoc dl = SVOp->getDebugLoc();
SmallVector<int, 16> MaskVals;
SVOp->getMask(MaskVals);
-
+
// If we have SSSE3, case 1 is generated when all result bytes come from
- // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
+ // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
// present, fall back to case 3.
// FIXME: kill V2Only once shuffles are canonizalized by getNode.
bool V1Only = true;
@@ -3676,13 +3751,13 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
else
V1Only = false;
}
-
+
// If SSSE3, use 1 pshufb instruction per vector with elements in the result.
if (TLI.getSubtarget()->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
-
+
// If all result elements are from one input vector, then only translate
- // undef mask values to 0x80 (zero out result) in the pshufb mask.
+ // undef mask values to 0x80 (zero out result) in the pshufb mask.
//
// Otherwise, we have elements from both input vectors, and must zero out
// elements that come from V2 in the first mask, and V1 in the second mask
@@ -3705,7 +3780,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
MVT::v16i8, &pshufbMask[0], 16));
if (!TwoInputs)
return V1;
-
+
// Calculate the shuffle mask for the second input, shuffle it, and
// OR it with the first shuffled input.
pshufbMask.clear();
@@ -3722,7 +3797,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
MVT::v16i8, &pshufbMask[0], 16));
return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
}
-
+
// No SSSE3 - Calculate in place words and then fix all out of place words
// With 0-16 extracts & inserts. Worst case is 16 bytes out of order from
// the 16 different words that comprise the two doublequadword input vectors.
@@ -3732,17 +3807,17 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
for (int i = 0; i != 8; ++i) {
int Elt0 = MaskVals[i*2];
int Elt1 = MaskVals[i*2+1];
-
+
// This word of the result is all undef, skip it.
if (Elt0 < 0 && Elt1 < 0)
continue;
-
+
// This word of the result is already in the correct place, skip it.
if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1))
continue;
if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17))
continue;
-
+
SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
SDValue InsElt;
@@ -3801,15 +3876,15 @@ static
SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG,
TargetLowering &TLI, DebugLoc dl) {
- MVT VT = SVOp->getValueType(0);
+ EVT VT = SVOp->getValueType(0);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
unsigned NumElems = VT.getVectorNumElements();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
- MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
- MVT MaskEltVT = MaskVT.getVectorElementType();
- MVT NewVT = MaskVT;
- switch (VT.getSimpleVT()) {
+ EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
+ EVT MaskEltVT = MaskVT.getVectorElementType();
+ EVT NewVT = MaskVT;
+ switch (VT.getSimpleVT().SimpleTy) {
default: assert(false && "Unexpected!");
case MVT::v4f32: NewVT = MVT::v2f64; break;
case MVT::v4i32: NewVT = MVT::v2i64; break;
@@ -3849,7 +3924,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
/// getVZextMovL - Return a zero-extending vector move low node.
///
-static SDValue getVZextMovL(MVT VT, MVT OpVT,
+static SDValue getVZextMovL(EVT VT, EVT OpVT,
SDValue SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget, DebugLoc dl) {
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
@@ -3859,11 +3934,11 @@ static SDValue getVZextMovL(MVT VT, MVT OpVT,
if (!LD) {
// movssrr and movsdrr do not clear top bits. Try to use movd, movq
// instead.
- MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
- if ((EVT != MVT::i64 || Subtarget->is64Bit()) &&
+ MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
+ if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) &&
SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
- SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) {
+ SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
// PR2108
OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
@@ -3889,8 +3964,8 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
- MVT VT = SVOp->getValueType(0);
-
+ EVT VT = SVOp->getValueType(0);
+
SmallVector<std::pair<int, int>, 8> Locs;
Locs.resize(4);
SmallVector<int, 8> Mask1(4U, -1);
@@ -3926,7 +4001,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
SmallVector<int, 8> Mask2(4U, -1);
-
+
for (unsigned i = 0; i != 4; ++i) {
if (Locs[i].first == -1)
continue;
@@ -4036,7 +4111,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- MVT VT = Op.getValueType();
+ EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
unsigned NumElems = VT.getVectorNumElements();
bool isMMX = VT.getSizeInBits() == 64;
@@ -4050,7 +4125,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// Promote splats to v4f32.
if (SVOp->isSplat()) {
- if (isMMX || NumElems < 4)
+ if (isMMX || NumElems < 4)
return Op;
return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2());
}
@@ -4079,10 +4154,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
DAG, Subtarget, dl);
}
}
-
+
if (X86::isPSHUFDMask(SVOp))
return Op;
-
+
// Check if this can be converted into a logical shift.
bool isLeft = false;
unsigned ShAmt = 0;
@@ -4092,11 +4167,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
- MVT EVT = VT.getVectorElementType();
- ShAmt *= EVT.getSizeInBits();
+ EVT EltVT = VT.getVectorElementType();
+ ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
-
+
if (X86::isMOVLMask(SVOp)) {
if (V1IsUndef)
return V2;
@@ -4105,7 +4180,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (!isMMX)
return Op;
}
-
+
// FIXME: fold these into legal mask.
if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
X86::isMOVSLDUPMask(SVOp) ||
@@ -4120,11 +4195,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (isShift) {
// No better options. Use a vshl / vsrl.
- MVT EVT = VT.getVectorElementType();
- ShAmt *= EVT.getSizeInBits();
+ EVT EltVT = VT.getVectorElementType();
+ ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
-
+
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
@@ -4144,7 +4219,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
// Shuffling low element of v1 into undef, just return v1.
- if (V2IsUndef)
+ if (V2IsUndef)
return V1;
// If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which
// the instruction selector will not match, so get a canonical MOVL with
@@ -4196,7 +4271,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SVOp->getMask(PermMask);
if (isShuffleMaskLegal(PermMask, VT))
return Op;
-
+
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this);
@@ -4209,7 +4284,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (NewOp.getNode())
return NewOp;
}
-
+
// Handle all 4 wide cases with a number of shuffles except for MMX.
if (NumElems == 4 && !isMMX)
return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
@@ -4220,7 +4295,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
SelectionDAG &DAG) {
- MVT VT = Op.getValueType();
+ EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
if (VT.getSizeInBits() == 8) {
SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
@@ -4283,7 +4358,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
return Res;
}
- MVT VT = Op.getValueType();
+ EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
@@ -4296,21 +4371,21 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
- MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1);
- SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EVT,
+ EVT EltVT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy+1);
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
Op.getOperand(0), Op.getOperand(1));
- SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EVT, Extract,
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
} else if (VT.getSizeInBits() == 32) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return Op;
-
+
// SHUFPS the element to the lowest double word, then movss.
int Mask[4] = { Idx, -1, -1, -1 };
- MVT VVT = Op.getOperand(0).getValueType();
- SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+ EVT VVT = Op.getOperand(0).getValueType();
+ SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0));
@@ -4326,8 +4401,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
int Mask[2] = { 1, -1 };
- MVT VVT = Op.getOperand(0).getValueType();
- SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+ EVT VVT = Op.getOperand(0).getValueType();
+ SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0));
@@ -4338,18 +4413,18 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
- MVT VT = Op.getValueType();
- MVT EVT = VT.getVectorElementType();
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
DebugLoc dl = Op.getDebugLoc();
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
- if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) &&
+ if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
isa<ConstantSDNode>(N2)) {
- unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB
- : X86ISD::PINSRW;
+ unsigned Opc = (EltVT.getSizeInBits() == 8) ? X86ISD::PINSRB
+ : X86ISD::PINSRW;
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument.
if (N1.getValueType() != MVT::i32)
@@ -4357,7 +4432,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
return DAG.getNode(Opc, dl, VT, N0, N1, N2);
- } else if (EVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
+ } else if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
// Bits [7:6] of the constant are the source select. This will always be
// zero here. The DAG Combiner may combine an extract_elt index into these
// bits. For example (insert (extract, 3), 2) could be matched by putting
@@ -4367,24 +4442,25 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
+ // Create this as a scalar to vector..
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
- } else if (EVT == MVT::i32) {
- // InsertPS works with constant index.
- if (isa<ConstantSDNode>(N2))
- return Op;
+ } else if (EltVT == MVT::i32 && isa<ConstantSDNode>(N2)) {
+ // PINSR* works with constant index.
+ return Op;
}
return SDValue();
}
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType();
- MVT EVT = VT.getVectorElementType();
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
if (Subtarget->hasSSE41())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
- if (EVT == MVT::i8)
+ if (EltVT == MVT::i8)
return SDValue();
DebugLoc dl = Op.getDebugLoc();
@@ -4392,7 +4468,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
- if (EVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
+ if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
if (N1.getValueType() != MVT::i32)
@@ -4413,9 +4489,12 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
Op.getOperand(0))));
+ if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
+
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
- MVT VT = MVT::v2i32;
- switch (Op.getValueType().getSimpleVT()) {
+ EVT VT = MVT::v2i32;
+ switch (Op.getValueType().getSimpleVT().SimpleTy) {
default: break;
case MVT::v16i8:
case MVT::v8i16:
@@ -4435,21 +4514,21 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
SDValue
X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-
+
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- if (Subtarget->isPICStyleStub())
- OpFlag = X86II::MO_PIC_BASE_OFFSET;
- else if (Subtarget->isPICStyleGOT())
- OpFlag = X86II::MO_GOTOFF;
- else if (Subtarget->isPICStyleRIPRel() &&
- getTargetMachine().getCodeModel() == CodeModel::Small)
- WrapperKind = X86ISD::WrapperRIP;
- }
-
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ WrapperKind = X86ISD::WrapperRIP;
+ else if (Subtarget->isPICStyleGOT())
+ OpFlag = X86II::MO_GOTOFF;
+ else if (Subtarget->isPICStyleStubPIC())
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
CP->getAlignment(),
CP->getOffset(), OpFlag);
@@ -4468,25 +4547,26 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-
+
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- if (Subtarget->isPICStyleStub())
- OpFlag = X86II::MO_PIC_BASE_OFFSET;
- else if (Subtarget->isPICStyleGOT())
- OpFlag = X86II::MO_GOTOFF;
- else if (Subtarget->isPICStyleRIPRel())
- WrapperKind = X86ISD::WrapperRIP;
- }
-
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ WrapperKind = X86ISD::WrapperRIP;
+ else if (Subtarget->isPICStyleGOT())
+ OpFlag = X86II::MO_GOTOFF;
+ else if (Subtarget->isPICStyleStubPIC())
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
OpFlag);
DebugLoc DL = JT->getDebugLoc();
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
-
+
// With PIC, the address is actually $g + Offset.
if (OpFlag) {
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
@@ -4494,43 +4574,44 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
DebugLoc::getUnknownLoc(), getPointerTy()),
Result);
}
-
+
return Result;
}
SDValue
X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
-
+
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- if (Subtarget->isPICStyleStub())
- OpFlag = X86II::MO_PIC_BASE_OFFSET;
- else if (Subtarget->isPICStyleGOT())
- OpFlag = X86II::MO_GOTOFF;
- else if (Subtarget->isPICStyleRIPRel())
- WrapperKind = X86ISD::WrapperRIP;
- }
-
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ WrapperKind = X86ISD::WrapperRIP;
+ else if (Subtarget->isPICStyleGOT())
+ OpFlag = X86II::MO_GOTOFF;
+ else if (Subtarget->isPICStyleStubPIC())
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
-
+
DebugLoc DL = Op.getDebugLoc();
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
-
-
+
+
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- !Subtarget->isPICStyleRIPRel()) {
+ !Subtarget->is64Bit()) {
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc::getUnknownLoc(),
getPointerTy()),
Result);
}
-
+
return Result;
}
@@ -4538,53 +4619,37 @@ SDValue
X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
int64_t Offset,
SelectionDAG &DAG) const {
- bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
- bool ExtraLoadRequired =
- Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false);
-
// Create the TargetGlobalAddress node, folding in the constant
// offset if it is legal.
+ unsigned char OpFlags =
+ Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
+ CodeModel::Model M = getTargetMachine().getCodeModel();
SDValue Result;
- if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) {
+ if (OpFlags == X86II::MO_NO_FLAG &&
+ X86::isOffsetSuitableForCodeModel(Offset, M)) {
+ // A direct static reference to a global.
Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
Offset = 0;
} else {
- unsigned char OpFlags = 0;
-
- if (Subtarget->isPICStyleRIPRel() &&
- getTargetMachine().getRelocationModel() != Reloc::Static) {
- if (ExtraLoadRequired)
- OpFlags = X86II::MO_GOTPCREL;
- } else if (Subtarget->isPICStyleGOT() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- if (ExtraLoadRequired)
- OpFlags = X86II::MO_GOT;
- else
- OpFlags = X86II::MO_GOTOFF;
- }
-
Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
}
-
+
if (Subtarget->isPICStyleRIPRel() &&
- getTargetMachine().getCodeModel() == CodeModel::Small)
+ (M == CodeModel::Small || M == CodeModel::Kernel))
Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
else
Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
- if (IsPic && !Subtarget->isPICStyleRIPRel()) {
+ if (isGlobalRelativeToPICBase(OpFlags)) {
Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
Result);
}
- // For Darwin & Mingw32, external and weak symbols are indirect, so we want to
- // load the value at address GV, not the value of GV itself. This means that
- // the GlobalAddress must be in the base or index register of the address, not
- // the GV offset field. Platform check is inside GVRequiresExtraLoad() call
- // The same applies for external symbols during PIC codegen
- if (ExtraLoadRequired)
+ // For globals that require a load from a stub to get the address, emit the
+ // load.
+ if (isGlobalStubReference(OpFlags))
Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
PseudoSourceValue::getGOT(), 0);
@@ -4606,7 +4671,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
static SDValue
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
- SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg,
+ SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags) {
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
DebugLoc dl = GA->getDebugLoc();
@@ -4628,7 +4693,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
static SDValue
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const MVT PtrVT) {
+ const EVT PtrVT) {
SDValue InFlag;
DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
@@ -4643,7 +4708,7 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
static SDValue
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const MVT PtrVT) {
+ const EVT PtrVT) {
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
X86::RAX, X86II::MO_TLSGD);
}
@@ -4651,7 +4716,7 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
// "local exec" model.
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const MVT PtrVT, TLSModel::Model model,
+ const EVT PtrVT, TLSModel::Model model,
bool is64Bit) {
DebugLoc dl = GA->getDebugLoc();
// Get the Thread Pointer
@@ -4677,7 +4742,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
assert(model == TLSModel::InitialExec);
OperandFlags = X86II::MO_INDNTPOFF;
}
-
+
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
@@ -4701,29 +4766,29 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
"TLS not implemented for non-ELF targets");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
-
+
// If GV is an alias then use the aliasee for determining
// thread-localness.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GV = GA->resolveAliasedGlobal(false);
-
+
TLSModel::Model model = getTLSModel(GV,
getTargetMachine().getRelocationModel());
-
+
switch (model) {
case TLSModel::GeneralDynamic:
case TLSModel::LocalDynamic: // not implemented
if (Subtarget->is64Bit())
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
-
+
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
Subtarget->is64Bit());
}
-
- assert(0 && "Unreachable");
+
+ llvm_unreachable("Unreachable");
return SDValue();
}
@@ -4732,17 +4797,16 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
/// take a 2 x i32 value to shift plus a shift amount.
SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
- MVT VT = Op.getValueType();
+ EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
DebugLoc dl = Op.getDebugLoc();
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- SDValue Tmp1 = isSRA ?
- DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
- DAG.getConstant(VTBits - 1, MVT::i8)) :
- DAG.getConstant(0, VT);
+ SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+ DAG.getConstant(VTBits - 1, MVT::i8))
+ : DAG.getConstant(0, VT);
SDValue Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
@@ -4754,9 +4818,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
}
SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
- DAG.getConstant(VTBits, MVT::i8));
+ DAG.getConstant(VTBits, MVT::i8));
SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT,
- AndNode, DAG.getConstant(0, MVT::i8));
+ AndNode, DAG.getConstant(0, MVT::i8));
SDValue Hi, Lo;
SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
@@ -4776,7 +4840,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
}
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
- MVT SrcVT = Op.getOperand(0).getValueType();
+ EVT SrcVT = Op.getOperand(0).getValueType();
if (SrcVT.isVector()) {
if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) {
@@ -4808,7 +4872,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
}
-SDValue X86TargetLowering::BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain,
+SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
SDValue StackSlot,
SelectionDAG &DAG) {
// Build the FILD
@@ -4888,19 +4952,22 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
*/
DebugLoc dl = Op.getDebugLoc();
+ LLVMContext *Context = DAG.getContext();
// Build some magic constants.
std::vector<Constant*> CV0;
- CV0.push_back(ConstantInt::get(APInt(32, 0x45300000)));
- CV0.push_back(ConstantInt::get(APInt(32, 0x43300000)));
- CV0.push_back(ConstantInt::get(APInt(32, 0)));
- CV0.push_back(ConstantInt::get(APInt(32, 0)));
+ CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
+ CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
+ CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+ CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
Constant *C0 = ConstantVector::get(CV0);
SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
std::vector<Constant*> CV1;
- CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
- CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
+ CV1.push_back(
+ ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+ CV1.push_back(
+ ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
@@ -4965,7 +5032,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) {
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
// Handle final rounding.
- MVT DestVT = Op.getValueType();
+ EVT DestVT = Op.getValueType();
if (DestVT.bitsLT(MVT::f64)) {
return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
@@ -4988,7 +5055,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
- MVT SrcVT = N0.getValueType();
+ EVT SrcVT = N0.getValueType();
if (SrcVT == MVT::i64) {
// We only handle SSE2 f64 target here; caller can expand the rest.
if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
@@ -5017,7 +5084,7 @@ std::pair<SDValue,SDValue> X86TargetLowering::
FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
DebugLoc dl = Op.getDebugLoc();
- MVT DstTy = Op.getValueType();
+ EVT DstTy = Op.getValueType();
if (!IsSigned) {
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
@@ -5043,10 +5110,10 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
unsigned MemSize = DstTy.getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
-
+
unsigned Opc;
- switch (DstTy.getSimpleVT()) {
- default: assert(0 && "Invalid FP_TO_SINT to lower!");
+ switch (DstTy.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
@@ -5105,18 +5172,19 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
}
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
+ LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
- MVT VT = Op.getValueType();
- MVT EltVT = VT;
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT;
if (VT.isVector())
EltVT = VT.getVectorElementType();
std::vector<Constant*> CV;
if (EltVT == MVT::f64) {
- Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))));
+ Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
CV.push_back(C);
CV.push_back(C);
} else {
- Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31))));
+ Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
@@ -5131,21 +5199,19 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
}
SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
+ LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
- MVT VT = Op.getValueType();
- MVT EltVT = VT;
- unsigned EltNum = 1;
- if (VT.isVector()) {
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT;
+ if (VT.isVector())
EltVT = VT.getVectorElementType();
- EltNum = VT.getVectorNumElements();
- }
std::vector<Constant*> CV;
if (EltVT == MVT::f64) {
- Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63)));
+ Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
CV.push_back(C);
CV.push_back(C);
} else {
- Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31)));
+ Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
@@ -5168,11 +5234,12 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
}
SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+ LLVMContext *Context = DAG.getContext();
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
DebugLoc dl = Op.getDebugLoc();
- MVT VT = Op.getValueType();
- MVT SrcVT = Op1.getValueType();
+ EVT VT = Op.getValueType();
+ EVT SrcVT = Op1.getValueType();
// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
@@ -5191,13 +5258,13 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// First get the sign bit of second operand.
std::vector<Constant*> CV;
if (SrcVT == MVT::f64) {
- CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63))));
- CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
} else {
- CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31))));
- CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
}
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -5220,13 +5287,13 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// Clear first operand sign bit.
CV.clear();
if (VT == MVT::f64) {
- CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))));
- CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
} else {
- CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))));
- CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -5299,21 +5366,48 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
Opcode = X86ISD::ADD;
NumOperands = 2;
break;
+ case ISD::AND: {
+ // If the primary and result isn't used, don't bother using X86ISD::AND,
+ // because a TEST instruction will be better.
+ bool NonFlagUse = false;
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() != ISD::BRCOND &&
+ UI->getOpcode() != ISD::SELECT &&
+ UI->getOpcode() != ISD::SETCC) {
+ NonFlagUse = true;
+ break;
+ }
+ if (!NonFlagUse)
+ break;
+ }
+ // FALL THROUGH
case ISD::SUB:
- // Due to the ISEL shortcoming noted above, be conservative if this sub is
+ case ISD::OR:
+ case ISD::XOR:
+ // Due to the ISEL shortcoming noted above, be conservative if this op is
// likely to be selected as part of a load-modify-store instruction.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() == ISD::STORE)
goto default_case;
- // Otherwise use a regular EFLAGS-setting sub.
- Opcode = X86ISD::SUB;
+ // Otherwise use a regular EFLAGS-setting instruction.
+ switch (Op.getNode()->getOpcode()) {
+ case ISD::SUB: Opcode = X86ISD::SUB; break;
+ case ISD::OR: Opcode = X86ISD::OR; break;
+ case ISD::XOR: Opcode = X86ISD::XOR; break;
+ case ISD::AND: Opcode = X86ISD::AND; break;
+ default: llvm_unreachable("unexpected operator!");
+ }
NumOperands = 2;
break;
case X86ISD::ADD:
case X86ISD::SUB:
case X86ISD::INC:
case X86ISD::DEC:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
return SDValue(Op.getNode(), 1);
default:
default_case:
@@ -5419,14 +5513,14 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
- MVT VT = Op.getValueType();
+ EVT VT = Op.getValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
DebugLoc dl = Op.getDebugLoc();
if (isFP) {
unsigned SSECC = 8;
- MVT VT0 = Op0.getValueType();
+ EVT VT0 = Op0.getValueType();
assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64);
unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
bool Swap = false;
@@ -5469,7 +5563,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
}
- assert(0 && "Illegal FP comparison");
+ llvm_unreachable("Illegal FP comparison");
}
// Handle all other FP comparisons here.
return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
@@ -5481,10 +5575,13 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
bool Swap = false, Invert = false, FlipSigns = false;
- switch (VT.getSimpleVT()) {
+ switch (VT.getSimpleVT().SimpleTy) {
default: break;
+ case MVT::v8i8:
case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
+ case MVT::v4i16:
case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
+ case MVT::v2i32:
case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
}
@@ -5508,7 +5605,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
if (FlipSigns) {
- MVT EltVT = VT.getVectorElementType();
+ EVT EltVT = VT.getVectorElementType();
SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
EltVT);
std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
@@ -5538,7 +5635,10 @@ static bool isX86LogicalCmp(SDValue Op) {
Opc == X86ISD::SMUL ||
Opc == X86ISD::UMUL ||
Opc == X86ISD::INC ||
- Opc == X86ISD::DEC))
+ Opc == X86ISD::DEC ||
+ Opc == X86ISD::OR ||
+ Opc == X86ISD::XOR ||
+ Opc == X86ISD::AND))
return true;
return false;
@@ -5560,7 +5660,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
- MVT VT = Op.getValueType();
+ EVT VT = Op.getValueType();
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
@@ -5751,8 +5851,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Flag;
- MVT IntPtr = getPointerTy();
- MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+ EVT IntPtr = getPointerTy();
+ EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
@@ -5802,8 +5902,8 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
if (const char *bzeroEntry = V &&
V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
- MVT IntPtr = getPointerTy();
- const Type *IntPtrTy = TD->getIntPtrType();
+ EVT IntPtr = getPointerTy();
+ const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst;
@@ -5812,8 +5912,9 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
Entry.Node = Size;
Args.push_back(Entry);
std::pair<SDValue,SDValue> CallResult =
- LowerCallTo(Chain, Type::VoidTy, false, false, false, false,
- 0, CallingConv::C, false,
+ LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false,
+ 0, CallingConv::C, false, /*isReturnValueUsed=*/false,
DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
return CallResult.second;
}
@@ -5824,7 +5925,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
uint64_t SizeVal = ConstantSize->getZExtValue();
SDValue InFlag(0, 0);
- MVT AVT;
+ EVT AVT;
SDValue Count;
ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
unsigned BytesLeft = 0;
@@ -5893,7 +5994,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
if (TwoRepStos) {
InFlag = Chain.getValue(1);
Count = Size;
- MVT CVT = Count.getValueType();
+ EVT CVT = Count.getValueType();
SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
@@ -5909,8 +6010,8 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
} else if (BytesLeft) {
// Handle the last 1 - 7 bytes.
unsigned Offset = SizeVal - BytesLeft;
- MVT AddrVT = Dst.getValueType();
- MVT SizeVT = Size.getValueType();
+ EVT AddrVT = Dst.getValueType();
+ EVT SizeVT = Size.getValueType();
Chain = DAG.getMemset(Chain, dl,
DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
@@ -5945,7 +6046,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
return SDValue();
// DWORD aligned
- MVT AVT = MVT::i32;
+ EVT AVT = MVT::i32;
if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
AVT = MVT::i64;
@@ -5980,9 +6081,9 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
if (BytesLeft) {
// Handle the last 1 - 7 bytes.
unsigned Offset = SizeVal - BytesLeft;
- MVT DstVT = Dst.getValueType();
- MVT SrcVT = Src.getValueType();
- MVT SizeVT = Size.getValueType();
+ EVT DstVT = Dst.getValueType();
+ EVT SrcVT = Src.getValueType();
+ EVT SizeVT = Size.getValueType();
Results.push_back(DAG.getMemcpy(Chain, dl,
DAG.getNode(ISD::ADD, dl, DstVT, Dst,
DAG.getConstant(Offset, DstVT)),
@@ -6054,8 +6155,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) {
SDValue SrcPtr = Op.getOperand(1);
SDValue SrcSV = Op.getOperand(2);
- assert(0 && "VAArgInst is not yet implemented for x86-64!");
- abort();
+ llvm_report_error("VAArgInst is not yet implemented for x86-64!");
return SDValue();
}
@@ -6179,6 +6279,36 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+ // ptest intrinsics. The intrinsic these come from are designed to return
+ // an integer value, not just an instruction so lower it to the ptest
+ // pattern and a setcc for the result.
+ case Intrinsic::x86_sse41_ptestz:
+ case Intrinsic::x86_sse41_ptestc:
+ case Intrinsic::x86_sse41_ptestnzc:{
+ unsigned X86CC = 0;
+ switch (IntNo) {
+ default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+ case Intrinsic::x86_sse41_ptestz:
+ // ZF = 1
+ X86CC = X86::COND_E;
+ break;
+ case Intrinsic::x86_sse41_ptestc:
+ // CF = 1
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_sse41_ptestnzc:
+ // ZF and CF = 0
+ X86CC = X86::COND_A;
+ break;
+ }
+
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS);
+ SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
// Fix vector shift instructions where the last operand is a non-immediate
// i32 value.
@@ -6203,7 +6333,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
return SDValue();
unsigned NewIntNo = 0;
- MVT ShAmtVT = MVT::v4i32;
+ EVT ShAmtVT = MVT::v4i32;
switch (IntNo) {
case Intrinsic::x86_sse2_pslli_w:
NewIntNo = Intrinsic::x86_sse2_psll_w;
@@ -6256,14 +6386,28 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_mmx_psrai_d:
NewIntNo = Intrinsic::x86_mmx_psra_d;
break;
- default: abort(); // Can't reach here.
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
}
break;
}
}
- MVT VT = Op.getValueType();
- ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShAmtVT, ShAmt));
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+ // to be zero.
+ SDValue ShOps[4];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ if (ShAmtVT == MVT::v4i32) {
+ ShOps[2] = DAG.getUNDEF(MVT::i32);
+ ShOps[3] = DAG.getUNDEF(MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
+ } else {
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+ }
+
+ EVT VT = Op.getValueType();
+ ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(NewIntNo, MVT::i32),
Op.getOperand(1), ShAmt);
@@ -6295,7 +6439,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- MVT VT = Op.getValueType();
+ EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
@@ -6401,12 +6545,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
} else {
const Function *Func =
cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
- unsigned CC = Func->getCallingConv();
+ CallingConv::ID CC = Func->getCallingConv();
unsigned NestReg;
switch (CC) {
default:
- assert(0 && "Unsupported calling convention");
+ llvm_unreachable("Unsupported calling convention");
case CallingConv::C:
case CallingConv::X86_StdCall: {
// Pass 'nest' parameter in ECX.
@@ -6428,8 +6572,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
if (InRegCount > 2) {
- cerr << "Nest register in use - reduce number of inreg parameters!\n";
- abort();
+ llvm_report_error("Nest register in use - reduce number of inreg parameters!");
}
}
break;
@@ -6499,7 +6642,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
const TargetMachine &TM = MF.getTarget();
const TargetFrameInfo &TFI = *TM.getFrameInfo();
unsigned StackAlignment = TFI.getStackAlignment();
- MVT VT = Op.getValueType();
+ EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
// Save FP Control Word to stack slot
@@ -6537,8 +6680,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
}
SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType();
- MVT OpVT = VT;
+ EVT VT = Op.getValueType();
+ EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
DebugLoc dl = Op.getDebugLoc();
@@ -6570,8 +6713,8 @@ SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
}
SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType();
- MVT OpVT = VT;
+ EVT VT = Op.getValueType();
+ EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
DebugLoc dl = Op.getDebugLoc();
@@ -6599,7 +6742,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
}
SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType();
+ EVT VT = Op.getValueType();
assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
DebugLoc dl = Op.getDebugLoc();
@@ -6656,7 +6799,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
switch (Op.getOpcode()) {
- default: assert(0 && "Unknown ovf instruction!");
+ default: llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
// A subtract of one will be selected as a INC. Note that INC doesn't
// set CF, so we can't do this for UADDO.
@@ -6712,11 +6855,11 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
}
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) {
- MVT T = Op.getValueType();
+ EVT T = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
unsigned Reg = 0;
unsigned size = 0;
- switch(T.getSimpleVT()) {
+ switch(T.getSimpleVT().SimpleTy) {
default:
assert(false && "Invalid value type!");
case MVT::i8: Reg = X86::AL; size = 1; break;
@@ -6763,7 +6906,7 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
- MVT T = Node->getValueType(0);
+ EVT T = Node->getValueType(0);
SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
DAG.getConstant(0, T), Node->getOperand(2));
return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
@@ -6778,7 +6921,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
///
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
switch (Op.getOpcode()) {
- default: assert(0 && "Should not custom lower this!");
+ default: llvm_unreachable("Should not custom lower this!");
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
@@ -6805,9 +6948,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
- case ISD::CALL: return LowerCALL(Op, DAG);
- case ISD::RET: return LowerRET(Op, DAG);
- case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
@@ -6836,7 +6976,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
void X86TargetLowering::
ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG, unsigned NewOp) {
- MVT T = Node->getValueType(0);
+ EVT T = Node->getValueType(0);
DebugLoc dl = Node->getDebugLoc();
assert (T == MVT::i64 && "Only know how to expand i64 atomics");
@@ -6846,12 +6986,11 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
Node->getOperand(2), DAG.getIntPtrConstant(0));
SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
Node->getOperand(2), DAG.getIntPtrConstant(1));
- // This is a generalized SDNode, not an AtomicSDNode, so it doesn't
- // have a MemOperand. Pass the info through as a normal operand.
- SDValue LSI = DAG.getMemOperand(cast<MemSDNode>(Node)->getMemOperand());
- SDValue Ops[] = { Chain, In1, In2L, In2H, LSI };
+ SDValue Ops[] = { Chain, In1, In2L, In2H };
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
- SDValue Result = DAG.getNode(NewOp, dl, Tys, Ops, 5);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64,
+ cast<MemSDNode>(Node)->getMemOperand());
SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
Results.push_back(Result.getValue(2));
@@ -6872,7 +7011,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
FP_TO_INTHelper(SDValue(N, 0), DAG, true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
if (FIST.getNode() != 0) {
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// Return a load from the stack slot.
Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0));
}
@@ -6893,7 +7032,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::ATOMIC_CMP_SWAP: {
- MVT T = N->getValueType(0);
+ EVT T = N->getValueType(0);
assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
SDValue cpInL, cpInH;
cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
@@ -6969,7 +7108,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FLD: return "X86ISD::FLD";
case X86ISD::FST: return "X86ISD::FST";
case X86ISD::CALL: return "X86ISD::CALL";
- case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
case X86ISD::BT: return "X86ISD::BT";
case X86ISD::CMP: return "X86ISD::CMP";
@@ -7027,7 +7165,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UMUL: return "X86ISD::UMUL";
case X86ISD::INC: return "X86ISD::INC";
case X86ISD::DEC: return "X86ISD::DEC";
+ case X86ISD::OR: return "X86ISD::OR";
+ case X86ISD::XOR: return "X86ISD::XOR";
+ case X86ISD::AND: return "X86ISD::AND";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
+ case X86ISD::PTEST: return "X86ISD::PTEST";
+ case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
}
}
@@ -7036,28 +7179,28 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const {
// X86 supports extremely general addressing modes.
+ CodeModel::Model M = getTargetMachine().getCodeModel();
// X86 allows a sign-extended 32-bit immediate field as a displacement.
- if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1)
+ if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
return false;
if (AM.BaseGV) {
- // We can only fold this if we don't need an extra load.
- if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
+ unsigned GVFlags =
+ Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
+
+ // If a reference to this global requires an extra load, we can't fold it.
+ if (isGlobalStubReference(GVFlags))
return false;
- // If BaseGV requires a register, we cannot also have a BaseReg.
- if (Subtarget->GVRequiresRegister(AM.BaseGV, getTargetMachine(), false) &&
- AM.HasBaseReg)
+
+ // If BaseGV requires a register for the PIC base, we cannot also have a
+ // BaseReg specified.
+ if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
return false;
- // X86-64 only supports addr of globals in small code model.
- if (Subtarget->is64Bit()) {
- if (getTargetMachine().getCodeModel() != CodeModel::Small)
- return false;
- // If lower 4G is not available, then we must use rip-relative addressing.
- if (AM.BaseOffs || AM.Scale > 1)
- return false;
- }
+ // If lower 4G is not available, then we must use rip-relative addressing.
+ if (Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+ return false;
}
switch (AM.Scale) {
@@ -7094,7 +7237,7 @@ bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
return Subtarget->is64Bit() || NumBits1 < 64;
}
-bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const {
+bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (!VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
@@ -7106,15 +7249,16 @@ bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const {
bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
- return Ty1 == Type::Int32Ty && Ty2 == Type::Int64Ty && Subtarget->is64Bit();
+ return Ty1 == Type::getInt32Ty(Ty1->getContext()) &&
+ Ty2 == Type::getInt64Ty(Ty1->getContext()) && Subtarget->is64Bit();
}
-bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const {
+bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
}
-bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const {
+bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
// i16 instructions are longer (0x66 prefix) and potentially slower.
return !(VT1 == MVT::i32 && VT2 == MVT::i16);
}
@@ -7124,8 +7268,8 @@ bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const {
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal.
bool
-X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
- MVT VT) const {
+X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+ EVT VT) const {
// Only do shuffles on 128-bit vector types for now.
if (VT.getSizeInBits() == 64)
return false;
@@ -7146,7 +7290,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
bool
X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
- MVT VT) const {
+ EVT VT) const {
unsigned NumElts = VT.getVectorNumElements();
// FIXME: This collection of masks seems suspect.
if (NumElts == 2)
@@ -7254,7 +7398,8 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
(*MIB).addOperand(*argOpers[i]);
MIB.addReg(t2);
assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
- (*MIB).addMemOperand(*F, *bInstr->memoperands_begin());
+ (*MIB).setMemRefs(bInstr->memoperands_begin(),
+ bInstr->memoperands_end());
MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg());
MIB.addReg(EAXreg);
@@ -7406,7 +7551,8 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
(*MIB).addOperand(*argOpers[i]);
assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
- (*MIB).addMemOperand(*F, *bInstr->memoperands_begin());
+ (*MIB).setMemRefs(bInstr->memoperands_begin(),
+ bInstr->memoperands_end());
MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3);
MIB.addReg(X86::EAX);
@@ -7450,7 +7596,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
- // Move all successors to thisMBB to nextMBB
+ // Move all successors of thisMBB to nextMBB
nextMBB->transferSuccessors(thisMBB);
// Update thisMBB to fall through to newMBB
@@ -7510,7 +7656,8 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
(*MIB).addOperand(*argOpers[i]);
MIB.addReg(t3);
assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
- (*MIB).addMemOperand(*F, *mInstr->memoperands_begin());
+ (*MIB).setMemRefs(mInstr->memoperands_begin(),
+ mInstr->memoperands_end());
MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg());
MIB.addReg(X86::EAX);
@@ -7522,70 +7669,190 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
return nextMBB;
}
-
+// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
+// all of this code can be replaced with that in the .td file.
MachineBasicBlock *
-X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *BB) const {
+X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned numArgs, bool memArg) const {
+
+ MachineFunction *F = BB->getParent();
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ unsigned Opc;
+ if (memArg)
+ Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
+ else
+ Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
+
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
+
+ for (unsigned i = 0; i < numArgs; ++i) {
+ MachineOperand &Op = MI->getOperand(i+1);
+
+ if (!(Op.isReg() && Op.isImplicit()))
+ MIB.addOperand(Op);
+ }
+
+ BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
+ .addReg(X86::XMM0);
+
+ F->DeleteMachineInstr(MI);
+
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // Emit code to save XMM registers to the stack. The ABI says that the
+ // number of registers to save is given in %al, so it's theoretically
+ // possible to do an indirect jump trick to avoid saving all of them,
+ // however this code takes a simpler approach and just executes all
+ // of the stores if %al is non-zero. It's less code, and it's probably
+ // easier on the hardware branch predictor, and stores aren't all that
+ // expensive anyway.
+
+ // Create the new basic blocks. One block contains all the XMM stores,
+ // and one block is the final destination regardless of whether any
+ // stores were performed.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction *F = MBB->getParent();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+ MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, XMMSaveMBB);
+ F->insert(MBBIter, EndMBB);
+
+ // Set up the CFG.
+ // Move any original successors of MBB to the end block.
+ EndMBB->transferSuccessors(MBB);
+ // The original block will now fall through to the XMM save block.
+ MBB->addSuccessor(XMMSaveMBB);
+ // The XMMSaveMBB will fall through to the end block.
+ XMMSaveMBB->addSuccessor(EndMBB);
+
+ // Now add the instructions.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned CountReg = MI->getOperand(0).getReg();
+ int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
+ int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
+
+ if (!Subtarget->isTargetWin64()) {
+ // If %al is 0, branch around the XMM save block.
+ BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
+ BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ }
+
+ // In the XMM save block, save all the XMM argument registers.
+ for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
+ MachineMemOperand *MMO =
+ F->getMachineMemOperand(
+ PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
+ MachineMemOperand::MOStore, Offset,
+ /*Size=*/16, /*Align=*/16);
+ BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
+ .addFrameIndex(RegSaveFrameIndex)
+ .addImm(/*Scale=*/1)
+ .addReg(/*IndexReg=*/0)
+ .addImm(/*Disp=*/Offset)
+ .addReg(/*Segment=*/0)
+ .addReg(MI->getOperand(i).getReg())
+ .addMemOperand(MMO);
+ }
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+
+ return EndMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ unsigned Opc =
+ X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+ BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ // Also inform sdisel of the edge changes.
+ for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
+ E = BB->succ_end(); I != E; ++I) {
+ EM->insert(std::make_pair(*I, sinkMBB));
+ sinkMBB->addSuccessor(*I);
+ }
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while (!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+}
+
+
+MachineBasicBlock *
+X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
+ case X86::CMOV_GR8:
case X86::CMOV_V1I64:
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_V4F32:
case X86::CMOV_V2F64:
- case X86::CMOV_V2I64: {
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // cmpTY ccX, r1, r2
- // bCC copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- unsigned Opc =
- X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
- BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
- // Update machine-CFG edges by transferring all successors of the current
- // block to the new block which will contain the Phi node for the select.
- sinkMBB->transferSuccessors(BB);
-
- // Add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
- // ...
- BB = sinkMBB;
- BuildMI(BB, dl, TII->get(X86::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
-
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
- return BB;
- }
+ case X86::CMOV_V2I64:
+ return EmitLoweredSelect(MI, BB, EM);
case X86::FP32_TO_INT16_IN_MEM:
case X86::FP32_TO_INT32_IN_MEM:
@@ -7596,33 +7863,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::FP80_TO_INT16_IN_MEM:
case X86::FP80_TO_INT32_IN_MEM:
case X86::FP80_TO_INT64_IN_MEM: {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
MachineFunction *F = BB->getParent();
int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
- addFrameReference(BuildMI(BB, dl, TII->get(X86::FNSTCW16m)), CWFrameIdx);
+ addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx);
// Load the old value of the high byte of the control word...
unsigned OldCW =
F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
- addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16rm), OldCW),
+ addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW),
CWFrameIdx);
// Set the high part to be round to zero...
- addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mi)), CWFrameIdx)
+ addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
.addImm(0xC7F);
// Reload the modified control word now...
- addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx);
+ addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx);
// Restore the memory image of control word to original value
- addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mr)), CWFrameIdx)
+ addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
.addReg(OldCW);
// Get the X86 opcode to use.
unsigned Opc;
switch (MI->getOpcode()) {
- default: assert(0 && "illegal opcode!");
+ default: llvm_unreachable("illegal opcode!");
case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
@@ -7655,15 +7925,26 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
} else {
AM.Disp = Op.getImm();
}
- addFullAddress(BuildMI(BB, dl, TII->get(Opc)), AM)
+ addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM)
.addReg(MI->getOperand(X86AddrNumOperands).getReg());
// Reload the original control word now.
- addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx);
+ addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx);
F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
return BB;
}
+ // String/text processing lowering.
+ case X86::PCMPISTRM128REG:
+ return EmitPCMP(MI, BB, 3, false /* in-mem */);
+ case X86::PCMPISTRM128MEM:
+ return EmitPCMP(MI, BB, 3, true /* in-mem */);
+ case X86::PCMPESTRM128REG:
+ return EmitPCMP(MI, BB, 5, false /* in mem */);
+ case X86::PCMPESTRM128MEM:
+ return EmitPCMP(MI, BB, 5, true /* in mem */);
+
+ // Atomic Lowering.
case X86::ATOMAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
X86::AND32ri, X86::MOV32rm,
@@ -7825,6 +8106,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
X86::MOV32rr, X86::MOV32rr,
X86::MOV32ri, X86::MOV32ri,
false);
+ case X86::VASTART_SAVE_XMM_REGS:
+ return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
}
}
@@ -7855,6 +8138,9 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
case X86ISD::UMUL:
case X86ISD::INC:
case X86ISD::DEC:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
// These nodes' second result is a boolean.
if (Op.getResNo() == 0)
break;
@@ -7891,7 +8177,7 @@ static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
}
static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
- MVT EVT, LoadSDNode *&LDBase,
+ EVT EltVT, LoadSDNode *&LDBase,
unsigned &LastLoadedElt,
SelectionDAG &DAG, MachineFrameInfo *MFI,
const TargetLowering &TLI) {
@@ -7919,7 +8205,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
continue;
LoadSDNode *LD = cast<LoadSDNode>(Elt);
- if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI))
+ if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI))
return false;
LastLoadedElt = i;
}
@@ -7935,8 +8221,8 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
DebugLoc dl = N->getDebugLoc();
- MVT VT = N->getValueType(0);
- MVT EVT = VT.getVectorElementType();
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
unsigned NumElems = VT.getVectorNumElements();
@@ -7947,7 +8233,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
LoadSDNode *LD = NULL;
unsigned LastLoadedElt;
- if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, LD, LastLoadedElt, DAG,
+ if (!EltsFromConsecutiveLoads(SVN, NumElems, EltVT, LD, LastLoadedElt, DAG,
MFI, TLI))
return SDValue();
@@ -7976,57 +8262,159 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Get the LHS/RHS of the select.
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
-
- // If we have SSE[12] support, try to form min/max nodes.
+
+ // If we have SSE[12] support, try to form min/max nodes. SSE min/max
+ // instructions have the peculiarity that if either operand is a NaN,
+ // they chose what we call the RHS operand (and as such are not symmetric).
+ // It happens that this matches the semantics of the common C idiom
+ // x<y?x:y and related forms, so we can recognize these cases.
if (Subtarget->hasSSE2() &&
(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
Cond.getOpcode() == ISD::SETCC) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opcode = 0;
+ // Check for x CC y ? x : y.
if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
switch (CC) {
default: break;
- case ISD::SETOLE: // (X <= Y) ? X : Y -> min
+ case ISD::SETULT:
+ // This can be a min if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(RHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(LHS))
+ break;
+ }
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETOLE:
+ // This can be a min if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(LHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(RHS))
+ break;
+ }
+ Opcode = X86ISD::FMIN;
+ break;
case ISD::SETULE:
- case ISD::SETLE:
- if (!UnsafeFPMath) break;
- // FALL THROUGH.
- case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
+ // This can be a min, but if either operand is a NaN we need it to
+ // preserve the original LHS.
+ std::swap(LHS, RHS);
+ case ISD::SETOLT:
case ISD::SETLT:
+ case ISD::SETLE:
Opcode = X86ISD::FMIN;
break;
- case ISD::SETOGT: // (X > Y) ? X : Y -> max
+ case ISD::SETOGE:
+ // This can be a max if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(LHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(RHS))
+ break;
+ }
+ Opcode = X86ISD::FMAX;
+ break;
case ISD::SETUGT:
+ // This can be a max if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(RHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(LHS))
+ break;
+ }
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETUGE:
+ // This can be a max, but if either operand is a NaN we need it to
+ // preserve the original LHS.
+ std::swap(LHS, RHS);
+ case ISD::SETOGT:
case ISD::SETGT:
- if (!UnsafeFPMath) break;
- // FALL THROUGH.
- case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
case ISD::SETGE:
Opcode = X86ISD::FMAX;
break;
}
+ // Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
switch (CC) {
default: break;
- case ISD::SETOGT: // (X > Y) ? Y : X -> min
+ case ISD::SETOGE:
+ // This can be a min if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(RHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(LHS))
+ break;
+ }
+ Opcode = X86ISD::FMIN;
+ break;
case ISD::SETUGT:
+ // This can be a min if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(LHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(RHS))
+ break;
+ }
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETUGE:
+ // This can be a min, but if either operand is a NaN we need it to
+ // preserve the original LHS.
+ std::swap(LHS, RHS);
+ case ISD::SETOGT:
case ISD::SETGT:
- if (!UnsafeFPMath) break;
- // FALL THROUGH.
- case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
case ISD::SETGE:
Opcode = X86ISD::FMIN;
break;
- case ISD::SETOLE: // (X <= Y) ? Y : X -> max
+ case ISD::SETULT:
+ // This can be a max if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(LHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(RHS))
+ break;
+ }
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETOLE:
+ // This can be a max if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(RHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(LHS))
+ break;
+ }
+ Opcode = X86ISD::FMAX;
+ break;
case ISD::SETULE:
- case ISD::SETLE:
- if (!UnsafeFPMath) break;
- // FALL THROUGH.
- case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
+ // This can be a max, but if either operand is a NaN we need it to
+ // preserve the original LHS.
+ std::swap(LHS, RHS);
+ case ISD::SETOLT:
case ISD::SETLT:
+ case ISD::SETLE:
Opcode = X86ISD::FMAX;
break;
}
@@ -8035,7 +8423,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (Opcode)
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
}
-
+
// If this is a select between two integer constants, try to do some
// optimizations.
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
@@ -8045,7 +8433,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// If this is efficiently invertible, canonicalize the LHSC/RHSC values
// so that TrueC (the true value) is larger than FalseC.
bool NeedsCondInvert = false;
-
+
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
// Efficiently invertible.
(Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
@@ -8054,41 +8442,41 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
NeedsCondInvert = true;
std::swap(TrueC, FalseC);
}
-
+
// Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
if (FalseC->getAPIntValue() == 0 &&
TrueC->getAPIntValue().isPowerOf2()) {
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, Cond.getValueType()));
-
+
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
-
+
unsigned ShAmt = TrueC->getAPIntValue().logBase2();
return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
DAG.getConstant(ShAmt, MVT::i8));
}
-
+
// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, Cond.getValueType()));
-
+
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
FalseC->getValueType(0), Cond);
return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
}
-
+
// Optimize cases that will turn into an LEA instruction. This requires
// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
-
+
bool isFastMultiplier = false;
if (Diff < 10) {
switch ((unsigned char)Diff) {
@@ -8104,13 +8492,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
break;
}
}
-
+
if (isFastMultiplier) {
APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, Cond.getValueType()));
-
+
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
Cond);
@@ -8118,17 +8506,17 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (Diff != 1)
Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
DAG.getConstant(Diff, Cond.getValueType()));
-
+
// Add the base if non-zero.
if (FalseC->getAPIntValue() != 0)
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
return Cond;
}
- }
+ }
}
}
-
+
return SDValue();
}
@@ -8136,11 +8524,11 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
DebugLoc DL = N->getDebugLoc();
-
+
// If the flag operand isn't dead, don't touch this CMOV.
if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
return SDValue();
-
+
// If this is a select between two integer constants, try to do some
// optimizations. Note that the operands are ordered the opposite of SELECT
// operands.
@@ -8149,12 +8537,12 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
// Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
// larger than FalseC (the false value).
X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
-
+
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
CC = X86::GetOppositeBranchCondition(CC);
std::swap(TrueC, FalseC);
}
-
+
// Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
// This is efficient for any integer data type (including i8/i16) and
// shift amount.
@@ -8162,10 +8550,10 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
SDValue Cond = N->getOperand(3);
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
-
+
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
-
+
unsigned ShAmt = TrueC->getAPIntValue().logBase2();
Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
DAG.getConstant(ShAmt, MVT::i8));
@@ -8173,31 +8561,31 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
-
+
// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
// for any integer data type, including i8/i16.
if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
SDValue Cond = N->getOperand(3);
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
-
+
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
FalseC->getValueType(0), Cond);
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
-
+
if (N->getNumValues() == 2) // Dead flag value?
return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
-
+
// Optimize cases that will turn into an LEA instruction. This requires
// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
-
+
bool isFastMultiplier = false;
if (Diff < 10) {
switch ((unsigned char)Diff) {
@@ -8213,7 +8601,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
break;
}
}
-
+
if (isFastMultiplier) {
APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
SDValue Cond = N->getOperand(3);
@@ -8235,7 +8623,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
- }
+ }
}
}
return SDValue();
@@ -8254,7 +8642,7 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
if (VT != MVT::i64)
return SDValue();
@@ -8289,17 +8677,17 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
std::swap(MulAmt1, MulAmt2);
SDValue NewMul;
- if (isPowerOf2_64(MulAmt1))
+ if (isPowerOf2_64(MulAmt1))
NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
DAG.getConstant(MulAmt1, VT));
- if (isPowerOf2_64(MulAmt2))
+ if (isPowerOf2_64(MulAmt2))
NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
- else
+ else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, VT));
@@ -8321,14 +8709,14 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
if (!Subtarget->hasSSE2())
return SDValue();
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
return SDValue();
SDValue ShAmtOp = N->getOperand(1);
- MVT EltVT = VT.getVectorElementType();
+ EVT EltVT = VT.getVectorElementType();
DebugLoc DL = N->getDebugLoc();
- SDValue BaseShAmt;
+ SDValue BaseShAmt = SDValue();
if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
unsigned NumElts = VT.getVectorNumElements();
unsigned i = 0;
@@ -8347,21 +8735,40 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
}
} else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
- BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
- DAG.getIntPtrConstant(0));
+ SDValue InVec = ShAmtOp.getOperand(0);
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = InVec.getValueType().getVectorNumElements();
+ unsigned i = 0;
+ for (; i != NumElts; ++i) {
+ SDValue Arg = InVec.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ BaseShAmt = Arg;
+ break;
+ }
+ } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
+ if (C->getZExtValue() == SplatIdx)
+ BaseShAmt = InVec.getOperand(1);
+ }
+ }
+ if (BaseShAmt.getNode() == 0)
+ BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
+ DAG.getIntPtrConstant(0));
} else
return SDValue();
+ // The shift amount is an i32.
if (EltVT.bitsGT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
else if (EltVT.bitsLT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, BaseShAmt);
+ BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
// The shift amount is identical so we can do a vector shift.
SDValue ValOp = N->getOperand(0);
switch (N->getOpcode()) {
default:
- assert(0 && "Unknown shift opcode!");
+ llvm_unreachable("Unknown shift opcode!");
break;
case ISD::SHL:
if (VT == MVT::v2i64)
@@ -8415,13 +8822,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
StoreSDNode *St = cast<StoreSDNode>(N);
- MVT VT = St->getValue().getValueType();
+ EVT VT = St->getValue().getValueType();
if (VT.getSizeInBits() != 64)
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
- bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
+ bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasSSE2();
if ((VT.isVector() ||
(VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
@@ -8464,7 +8871,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
// pair instead.
if (Subtarget->is64Bit() || F64IsLegal) {
- MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+ EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
Ld->getBasePtr(), Ld->getSrcValue(),
Ld->getSrcValueOffset(), Ld->isVolatile(),
@@ -8568,9 +8975,9 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Op = N->getOperand(0);
if (Op.getOpcode() == ISD::BIT_CONVERT)
Op = Op.getOperand(0);
- MVT VT = N->getValueType(0), OpVT = Op.getValueType();
+ EVT VT = N->getValueType(0), OpVT = Op.getValueType();
if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
- VT.getVectorElementType().getSizeInBits() ==
+ VT.getVectorElementType().getSizeInBits() ==
OpVT.getVectorElementType().getSizeInBits()) {
return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
}
@@ -8580,7 +8987,7 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
// operations are flushed before issuing the locked instruction, and the
-// are not buffered), so we can fold away the common pattern of
+// are not buffered), so we can fold away the common pattern of
// fence-atomic-fence.
static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) {
SDValue atomic = N->getOperand(0);
@@ -8601,11 +9008,11 @@ static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) {
default:
return SDValue();
}
-
+
SDValue fence = atomic.getOperand(0);
if (fence.getOpcode() != ISD::MEMBARRIER)
return SDValue();
-
+
switch (atomic.getOpcode()) {
case ISD::ATOMIC_CMP_SWAP:
return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
@@ -8657,6 +9064,101 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
+static bool LowerToBSwap(CallInst *CI) {
+ // FIXME: this should verify that we are targetting a 486 or better. If not,
+ // we will turn this bswap into something that will be lowered to logical ops
+ // instead of emitting the bswap asm. For now, we don't support 486 or lower
+ // so don't worry about this.
+
+ // Verify this is a simple bswap.
+ if (CI->getNumOperands() != 2 ||
+ CI->getType() != CI->getOperand(1)->getType() ||
+ !CI->getType()->isInteger())
+ return false;
+
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+
+ // Okay, we can do this xform, do so now.
+ const Type *Tys[] = { Ty };
+ Module *M = CI->getParent()->getParent()->getParent();
+ Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+
+ Value *Op = CI->getOperand(1);
+ Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+ CI->replaceAllUsesWith(Op);
+ CI->eraseFromParent();
+ return true;
+}
+
+bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
+
+ std::string AsmStr = IA->getAsmString();
+
+ // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+ std::vector<std::string> AsmPieces;
+ SplitString(AsmStr, AsmPieces, "\n"); // ; as separator?
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ AsmStr = AsmPieces[0];
+ AsmPieces.clear();
+ SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
+
+ // bswap $0
+ if (AsmPieces.size() == 2 &&
+ (AsmPieces[0] == "bswap" ||
+ AsmPieces[0] == "bswapq" ||
+ AsmPieces[0] == "bswapl") &&
+ (AsmPieces[1] == "$0" ||
+ AsmPieces[1] == "${0:q}")) {
+ // No need to check constraints, nothing other than the equivalent of
+ // "=r,0" would be valid here.
+ return LowerToBSwap(CI);
+ }
+ // rorw $$8, ${0:w} --> llvm.bswap.i16
+ if (CI->getType() == Type::getInt16Ty(CI->getContext()) &&
+ AsmPieces.size() == 3 &&
+ AsmPieces[0] == "rorw" &&
+ AsmPieces[1] == "$$8," &&
+ AsmPieces[2] == "${0:w}" &&
+ IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") {
+ return LowerToBSwap(CI);
+ }
+ break;
+ case 3:
+ if (CI->getType() == Type::getInt64Ty(CI->getContext()) &&
+ Constraints.size() >= 2 &&
+ Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+ Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+ // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
+ std::vector<std::string> Words;
+ SplitString(AsmPieces[0], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+ Words.clear();
+ SplitString(AsmPieces[1], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+ Words.clear();
+ SplitString(AsmPieces[2], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+ Words[2] == "%edx") {
+ return LowerToBSwap(CI);
+ }
+ }
+ }
+ }
+ break;
+ }
+ return false;
+}
+
+
+
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
X86TargetLowering::ConstraintType
@@ -8689,7 +9191,7 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const {
/// with another that has more specific requirements based on the type of the
/// corresponding operand.
const char *X86TargetLowering::
-LowerXConstraint(MVT ConstraintVT) const {
+LowerXConstraint(EVT ConstraintVT) const {
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets.
if (ConstraintVT.isFloatingPoint()) {
@@ -8749,7 +9251,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// 32-bit signed value
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
const ConstantInt *CI = C->getConstantIntValue();
- if (CI->isValueValidForType(Type::Int32Ty, C->getSExtValue())) {
+ if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+ C->getSExtValue())) {
// Widen to 64 bits here to get it sign extended.
Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
break;
@@ -8763,7 +9266,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// 32-bit unsigned value
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
const ConstantInt *CI = C->getConstantIntValue();
- if (CI->isValueValidForType(Type::Int32Ty, C->getZExtValue())) {
+ if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+ C->getZExtValue())) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
@@ -8803,16 +9307,22 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
continue;
}
}
-
+
// Otherwise, this isn't something we can handle, reject it.
return;
}
+ GlobalValue *GV = GA->getGlobal();
+ // If we require an extra load to get this address, as in PIC mode, we
+ // can't accept it.
+ if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
+ getTargetMachine())))
+ return;
+
if (hasMemory)
- Op = LowerGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), Offset, DAG);
+ Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
else
- Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
- Offset);
+ Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset);
Result = Op;
break;
}
@@ -8828,12 +9338,42 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::vector<unsigned> X86TargetLowering::
getRegClassForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const {
+ EVT VT) const {
if (Constraint.size() == 1) {
// FIXME: not handling fp-stack yet!
switch (Constraint[0]) { // GCC X86 Constraint Letters
default: break; // Unknown constraint letter
- case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
+ case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+ if (Subtarget->is64Bit()) {
+ if (VT == MVT::i32)
+ return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
+ X86::ESI, X86::EDI, X86::R8D, X86::R9D,
+ X86::R10D,X86::R11D,X86::R12D,
+ X86::R13D,X86::R14D,X86::R15D,
+ X86::EBP, X86::ESP, 0);
+ else if (VT == MVT::i16)
+ return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
+ X86::SI, X86::DI, X86::R8W,X86::R9W,
+ X86::R10W,X86::R11W,X86::R12W,
+ X86::R13W,X86::R14W,X86::R15W,
+ X86::BP, X86::SP, 0);
+ else if (VT == MVT::i8)
+ return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL,
+ X86::SIL, X86::DIL, X86::R8B,X86::R9B,
+ X86::R10B,X86::R11B,X86::R12B,
+ X86::R13B,X86::R14B,X86::R15B,
+ X86::BPL, X86::SPL, 0);
+
+ else if (VT == MVT::i64)
+ return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX,
+ X86::RSI, X86::RDI, X86::R8, X86::R9,
+ X86::R10, X86::R11, X86::R12,
+ X86::R13, X86::R14, X86::R15,
+ X86::RBP, X86::RSP, 0);
+
+ break;
+ }
+ // 32-bit fallthrough
case 'Q': // Q_REGS
if (VT == MVT::i32)
return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
@@ -8852,7 +9392,7 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
std::pair<unsigned, const TargetRegisterClass*>
X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const {
+ EVT VT) const {
// First, see if this is a constraint that directly corresponds to an LLVM
// register class.
if (Constraint.size() == 1) {
@@ -8860,7 +9400,6 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
switch (Constraint[0]) {
default: break;
case 'r': // GENERAL_REGS
- case 'R': // LEGACY_REGS
case 'l': // INDEX_REGS
if (VT == MVT::i8)
return std::make_pair(0U, X86::GR8RegisterClass);
@@ -8869,6 +9408,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
if (VT == MVT::i32 || !Subtarget->is64Bit())
return std::make_pair(0U, X86::GR32RegisterClass);
return std::make_pair(0U, X86::GR64RegisterClass);
+ case 'R': // LEGACY_REGS
+ if (VT == MVT::i8)
+ return std::make_pair(0U, X86::GR8_NOREXRegisterClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16_NOREXRegisterClass);
+ if (VT == MVT::i32 || !Subtarget->is64Bit())
+ return std::make_pair(0U, X86::GR32_NOREXRegisterClass);
+ return std::make_pair(0U, X86::GR64_NOREXRegisterClass);
case 'f': // FP Stack registers.
// If SSE is enabled for this VT, use f80 to ensure the isel moves the
// value to the correct fpstack register class.
@@ -8886,7 +9433,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
case 'x': // SSE_REGS if SSE1 allowed
if (!Subtarget->hasSSE1()) break;
- switch (VT.getSimpleVT()) {
+ switch (VT.getSimpleVT().SimpleTy) {
default: break;
// Scalar SSE types.
case MVT::f32:
@@ -8915,15 +9462,39 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// Not found as a standard register?
if (Res.second == 0) {
- // GCC calls "st(0)" just plain "st".
+ // Map st(0) -> st(7) -> ST0
+ if (Constraint.size() == 7 && Constraint[0] == '{' &&
+ tolower(Constraint[1]) == 's' &&
+ tolower(Constraint[2]) == 't' &&
+ Constraint[3] == '(' &&
+ (Constraint[4] >= '0' && Constraint[4] <= '7') &&
+ Constraint[5] == ')' &&
+ Constraint[6] == '}') {
+
+ Res.first = X86::ST0+Constraint[4]-'0';
+ Res.second = X86::RFP80RegisterClass;
+ return Res;
+ }
+
+ // GCC allows "st(0)" to be called just plain "st".
if (StringsEqualNoCase("{st}", Constraint)) {
Res.first = X86::ST0;
Res.second = X86::RFP80RegisterClass;
+ return Res;
+ }
+
+ // flags -> EFLAGS
+ if (StringsEqualNoCase("{flags}", Constraint)) {
+ Res.first = X86::EFLAGS;
+ Res.second = X86::CCRRegisterClass;
+ return Res;
}
+
// 'A' means EAX + EDX.
if (Constraint == "A") {
Res.first = X86::EAX;
- Res.second = X86::GRADRegisterClass;
+ Res.second = X86::GR32_ADRegisterClass;
+ return Res;
}
return Res;
}
@@ -9015,7 +9586,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
/// When and where to widen is target dependent based on the cost of
/// scalarizing vs using the wider vector type.
-MVT X86TargetLowering::getWidenVectorType(MVT VT) const {
+EVT X86TargetLowering::getWidenVectorType(EVT VT) const {
assert(VT.isVector());
if (isTypeLegal(VT))
return VT;
@@ -9024,7 +9595,7 @@ MVT X86TargetLowering::getWidenVectorType(MVT VT) const {
// type based on element type. This would speed up our search (though
// it may not be worth it since the size of the list is relatively
// small).
- MVT EltVT = VT.getVectorElementType();
+ EVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
// On X86, it make sense to widen any vector wider than 1
@@ -9033,7 +9604,7 @@ MVT X86TargetLowering::getWidenVectorType(MVT VT) const {
for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE;
nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- MVT SVT = (MVT::SimpleValueType)nVT;
+ EVT SVT = (MVT::SimpleValueType)nVT;
if (isTypeLegal(SVT) &&
SVT.getVectorElementType() == EltVT &&
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index ffed46c..2f7b8ba 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -85,7 +85,7 @@ namespace llvm {
/// as.
FST,
- /// CALL/TAILCALL - These operations represent an abstract X86 call
+ /// CALL - These operations represent an abstract X86 call
/// instruction, which includes a bunch of information. In particular the
/// operands of these node are:
///
@@ -102,12 +102,8 @@ namespace llvm {
/// #1 - The first register result value (optional)
/// #2 - The second register result value (optional)
///
- /// The CALL vs TAILCALL distinction boils down to whether the callee is
- /// known not to modify the caller's stack frame, as is standard with
- /// LLVM.
CALL,
- TAILCALL,
-
+
/// RDTSC_DAG - This operation implements the lowering for
/// readcyclecounter
RDTSC_DAG,
@@ -208,17 +204,6 @@ namespace llvm {
LCMPXCHG_DAG,
LCMPXCHG8_DAG,
- // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
- // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
- // Atomic 64-bit binary operations.
- ATOMADD64_DAG,
- ATOMSUB64_DAG,
- ATOMOR64_DAG,
- ATOMXOR64_DAG,
- ATOMAND64_DAG,
- ATOMNAND64_DAG,
- ATOMSWAP64_DAG,
-
// FNSTCW16m - Store FP control world into i16 memory.
FNSTCW16m,
@@ -241,10 +226,29 @@ namespace llvm {
// ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results.
ADD, SUB, SMUL, UMUL,
- INC, DEC,
+ INC, DEC, OR, XOR, AND,
// MUL_IMM - X86 specific multiply by immediate.
- MUL_IMM
+ MUL_IMM,
+
+ // PTEST - Vector bitwise comparisons
+ PTEST,
+
+ // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
+ // according to %al. An operator is needed so that this can be expanded
+ // with control flow.
+ VASTART_SAVE_XMM_REGS,
+
+ // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
+ // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
+ // Atomic 64-bit binary operations.
+ ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ ATOMSUB64_DAG,
+ ATOMOR64_DAG,
+ ATOMXOR64_DAG,
+ ATOMAND64_DAG,
+ ATOMNAND64_DAG,
+ ATOMSWAP64_DAG
};
}
@@ -333,6 +337,15 @@ namespace llvm {
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
/// instructions.
unsigned getShufflePSHUFLWImmediate(SDNode *N);
+
+ /// isZeroNode - Returns true if Elt is a constant zero or a floating point
+ /// constant +0.0.
+ bool isZeroNode(SDValue Elt);
+
+ /// isOffsetSuitableForCodeModel - Returns true of the given offset can be
+ /// fit into displacement field of the instruction.
+ bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+ bool hasSymbolicDisplacement = true);
}
//===--------------------------------------------------------------------===//
@@ -374,12 +387,17 @@ namespace llvm {
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
- /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
+ /// lowering. It returns EVT::iAny if SelectionDAG should be responsible for
/// determining it.
- virtual
- MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr,
- SelectionDAG &DAG) const;
+ virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
+ bool isSrcConst, bool isSrcStr,
+ SelectionDAG &DAG) const;
+
+ /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// unaligned memory accesses. of the specified type.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
+ return true;
+ }
/// LowerOperation - Provide custom lowering hooks for some operations.
///
@@ -395,7 +413,8 @@ namespace llvm {
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
+ MachineBasicBlock *MBB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
/// getTargetNodeName - This method returns the name of a target specific
@@ -403,7 +422,7 @@ namespace llvm {
virtual const char *getTargetNodeName(unsigned Opcode) const;
/// getSetCCResultType - Return the ISD::SETCC ValueType
- virtual MVT getSetCCResultType(MVT VT) const;
+ virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
/// computeMaskedBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
@@ -420,13 +439,15 @@ namespace llvm {
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
+ virtual bool ExpandInlineAsm(CallInst *CI) const;
+
ConstraintType getConstraintType(const std::string &Constraint) const;
std::vector<unsigned>
getRegClassForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const;
+ EVT VT) const;
- virtual const char *LowerXConstraint(MVT ConstraintVT) const;
+ virtual const char *LowerXConstraint(EVT ConstraintVT) const;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops. If hasMemory is
@@ -444,7 +465,7 @@ namespace llvm {
/// error, this returns a register number of 0.
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const;
+ EVT VT) const;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
@@ -454,7 +475,7 @@ namespace llvm {
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
/// register EAX to i16 by referencing its sub-register AX.
virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
- virtual bool isTruncateFree(MVT VT1, MVT VT2) const;
+ virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
/// isZExtFree - Return true if any actual instruction that defines a
/// value of type Ty1 implicit zero-extends the value to Ty2 in the result
@@ -465,31 +486,31 @@ namespace llvm {
/// all instructions that define 32-bit values implicit zero-extend the
/// result out to 64 bits.
virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
- virtual bool isZExtFree(MVT VT1, MVT VT2) const;
+ virtual bool isZExtFree(EVT VT1, EVT VT2) const;
/// isNarrowingProfitable - Return true if it's profitable to narrow
/// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
/// from i32 to i8 but not from i32 to i16.
- virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const;
+ virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const;
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask
/// values are assumed to be legal.
virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
- MVT VT) const;
+ EVT VT) const;
/// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
/// used by Targets can use this to indicate if there is a suitable
/// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
/// pool entry.
virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
- MVT VT) const;
+ EVT VT) const;
/// ShouldShrinkFPConstant - If true, then instruction selection should
/// seek to shrink the FP constant of the specified type to a smaller type
/// in order to save space and / or reduce runtime.
- virtual bool ShouldShrinkFPConstant(MVT VT) const {
+ virtual bool ShouldShrinkFPConstant(EVT VT) const {
// Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
// expensive than a straight movsd. On the other hand, it's important to
// shrink long double fp constant since fldt is very slow.
@@ -497,11 +518,14 @@ namespace llvm {
}
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
- /// for tail call optimization. Target which want to do tail call
+ /// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
- virtual bool IsEligibleForTailCallOptimization(CallSDNode *TheCall,
- SDValue Ret,
- SelectionDAG &DAG) const;
+ virtual bool
+ IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
virtual const X86Subtarget* getSubtarget() {
return Subtarget;
@@ -509,17 +533,17 @@ namespace llvm {
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
/// computed in an SSE register, not on the X87 floating point stack.
- bool isScalarFPTypeInSSEReg(MVT VT) const {
+ bool isScalarFPTypeInSSEReg(EVT VT) const {
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
/// getWidenVectorType: given a vector type, returns the type to widen
/// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
- /// If there is no vector type that we want to widen to, returns MVT::Other
+ /// If there is no vector type that we want to widen to, returns EVT::Other
/// When and were to widen is target dependent based on the cost of
/// scalarizing vs using the wider vector type.
- virtual MVT getWidenVectorType(MVT VT) const;
+ virtual EVT getWidenVectorType(EVT VT) const;
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
@@ -554,28 +578,30 @@ namespace llvm {
bool X86ScalarSSEf32;
bool X86ScalarSSEf64;
- SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
- unsigned CallingConv, SelectionDAG &DAG);
-
- SDValue LowerMemArgument(SDValue Op, SelectionDAG &DAG,
- const CCValAssign &VA, MachineFrameInfo *MFI,
- unsigned CC, SDValue Root, unsigned i);
-
- SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
- const SDValue &StackPtr,
- const CCValAssign &VA, SDValue Chain,
- SDValue Arg, ISD::ArgFlagsTy Flags);
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals);
+ SDValue LowerMemArgument(SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &ArgInfo,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA, MachineFrameInfo *MFI,
+ unsigned i);
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags);
// Call lowering helpers.
- bool IsCalleePop(bool isVarArg, unsigned CallingConv);
- bool CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall);
- bool CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall);
+ bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv);
SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
SDValue Chain, bool IsTailCall, bool Is64Bit,
int FPDiff, DebugLoc dl);
- CCAssignFn *CCAssignFnForNode(unsigned CallingConv) const;
- NameDecorationStyle NameDecorationForFORMAL_ARGUMENTS(SDValue Op);
+ CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const;
+ NameDecorationStyle NameDecorationForCallConv(CallingConv::ID CallConv);
unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG);
std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
@@ -595,7 +621,7 @@ namespace llvm {
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
- SDValue BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain, SDValue StackSlot,
+ SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG);
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG);
@@ -612,10 +638,7 @@ namespace llvm {
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG);
SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG);
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
- SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
- SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
- SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG);
@@ -635,6 +658,26 @@ namespace llvm {
SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG);
SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG);
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals);
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals);
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ DebugLoc dl, SelectionDAG &DAG);
+
void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned NewOp);
@@ -651,9 +694,17 @@ namespace llvm {
const Value *DstSV, uint64_t DstSVOff,
const Value *SrcSV, uint64_t SrcSVOff);
+ /// Utility function to emit string processing sse4.2 instructions
+ /// that return in xmm0.
+ /// This takes the instruction to expand, the associated machine basic
+ /// block, the number of args, and whether or not the second arg is
+ /// in memory or not.
+ MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
+ unsigned argNum, bool inMem) const;
+
/// Utility function to emit atomic bitwise operations (and, or, xor).
- // It takes the bitwise instruction to expand, the associated machine basic
- // block, and the associated X86 opcodes for reg/reg and reg/imm.
+ /// It takes the bitwise instruction to expand, the associated machine basic
+ /// block, and the associated X86 opcodes for reg/reg and reg/imm.
MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
MachineInstr *BInstr,
MachineBasicBlock *BB,
@@ -683,6 +734,15 @@ namespace llvm {
MachineBasicBlock *BB,
unsigned cmovOpc) const;
+ /// Utility function to emit the xmm reg save portion of va_start.
+ MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
+ MachineBasicBlock *BB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent, for use with the given x86 condition code.
SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 472ba4c..ef19823 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -28,26 +28,29 @@ def i64i32imm_pcrel : Operand<i64> {
// 64-bits but only 8 bits are significant.
-def i64i8imm : Operand<i64>;
+def i64i8imm : Operand<i64> {
+ let ParserMatchClass = ImmSExt8AsmOperand;
+}
def lea64mem : Operand<i64> {
let PrintMethod = "printlea64mem";
- let MIOperandInfo = (ops GR64, i8imm, GR64, i32imm);
+ let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm);
+ let ParserMatchClass = X86MemAsmOperand;
}
def lea64_32mem : Operand<i32> {
let PrintMethod = "printlea64_32mem";
let AsmOperandLowerMethod = "lower_lea64_32mem";
- let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+ let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
+ let ParserMatchClass = X86MemAsmOperand;
}
//===----------------------------------------------------------------------===//
// Complex Pattern Definitions.
//
def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
- [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper,
- X86WrapperRIP],
- []>;
+ [add, sub, mul, X86mul_imm, shl, or, frameindex,
+ X86WrapperRIP], []>;
def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
@@ -129,13 +132,40 @@ let isCall = 1 in
def CALL64pcrel32 : Ii32<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst, variable_ops),
"call\t$dst", []>,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode, NotWin64]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
- "call\t{*}$dst", [(X86call GR64:$dst)]>;
+ "call\t{*}$dst", [(X86call GR64:$dst)]>,
+ Requires<[NotWin64]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
- "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>;
+ "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
+ Requires<[NotWin64]>;
+
+ def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
+ "lcall{q}\t{*}$dst", []>;
}
+ // FIXME: We need to teach codegen about single list of call-clobbered registers.
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. RSP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+ Uses = [RSP] in {
+ def WINCALL64pcrel32 : I<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call\t$dst", []>,
+ Requires<[IsWin64]>;
+ def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call\t{*}$dst",
+ [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
+ def WINCALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+ "call\t{*}$dst",
+ [(X86call (loadi64 addr:$dst))]>, Requires<[IsWin64]>;
+ }
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
@@ -162,6 +192,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
[(brind GR64:$dst)]>;
def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
[(brind (loadi64 addr:$dst))]>;
+ def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
+ "ljmp{q}\t{*}$dst", []>;
}
//===----------------------------------------------------------------------===//
@@ -182,12 +214,18 @@ let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
def LEAVE64 : I<0xC9, RawFrm,
(outs), (ins), "leave", []>;
let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
-let mayLoad = 1 in
+let mayLoad = 1 in {
def POP64r : I<0x58, AddRegFrm,
(outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
-let mayStore = 1 in
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
+}
+let mayStore = 1 in {
def PUSH64r : I<0x50, AddRegFrm,
(outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
+}
}
let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
@@ -246,6 +284,14 @@ let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in
def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
[(X86rep_stos i64)]>, REP;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scas{q}", []>;
+
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmps{q}", []>;
+
+// Fast system-call instructions
+def SYSEXIT64 : RI<0x35, RawFrm,
+ (outs), (ins), "sysexit", []>, TB;
+
//===----------------------------------------------------------------------===//
// Move Instructions...
//
@@ -275,6 +321,25 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(store i64immSExt32:$src, addr:$dst)]>;
+def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins i8imm:$src),
+ "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64o32a : RIi32<0xA1, RawFrm, (outs), (ins i32imm:$src),
+ "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64ao8 : RIi8<0xA2, RawFrm, (outs i8imm:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+def MOV64ao32 : RIi32<0xA3, RawFrm, (outs i32imm:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+
+// Moves to and from segment registers
+def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>;
+def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>;
+
// Sign/Zero extenders
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
@@ -332,13 +397,15 @@ def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
[(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
// Any instruction that defines a 32-bit result leaves the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
-// be copying from a truncate, but any other 32-bit operation will zero-extend
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. And x86's cmov doesn't do anything if the
+// condition is false. But any other 32-bit operation will zero-extend
// up to 64 bits.
def def32 : PatLeaf<(i32 GR32:$src), [{
return N->getOpcode() != ISD::TRUNCATE &&
N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG &&
- N->getOpcode() != ISD::CopyFromReg;
+ N->getOpcode() != ISD::CopyFromReg &&
+ N->getOpcode() != X86ISD::CMOV;
}]>;
// In the case of a 32-bit def that is known to implicitly zero-extend,
@@ -361,6 +428,10 @@ let neverHasSideEffects = 1 in {
//
let Defs = [EFLAGS] in {
+
+def ADD64i32 : RI<0x05, RawFrm, (outs), (ins i32imm:$src),
+ "add{q}\t{$src, %rax|%rax, $src}", []>;
+
let isTwoAddress = 1 in {
let isConvertibleToThreeAddress = 1 in {
let isCommutable = 1 in
@@ -386,6 +457,12 @@ def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:
"add{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (add GR64:$src1, (load addr:$src2))),
(implicit EFLAGS)]>;
+
+// Register-Register Addition - Equivalent to the normal rr form (ADD64rr), but
+// differently encoded.
+def ADD64mrmrr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>;
+
} // isTwoAddress
// Memory-Register Addition
@@ -403,6 +480,10 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
(implicit EFLAGS)]>;
let Uses = [EFLAGS] in {
+
+def ADC64i32 : RI<0x15, RawFrm, (outs), (ins i32imm:$src),
+ "adc{q}\t{$src, %rax|%rax, $src}", []>;
+
let isTwoAddress = 1 in {
let isCommutable = 1 in
def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
@@ -458,6 +539,9 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
(implicit EFLAGS)]>;
} // isTwoAddress
+def SUB64i32 : RI<0x2D, RawFrm, (outs), (ins i32imm:$src),
+ "sub{q}\t{$src, %rax|%rax, $src}", []>;
+
// Memory-Register Subtraction
def SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"sub{q}\t{$src2, $dst|$dst, $src2}",
@@ -494,6 +578,9 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:
[(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
+def SBB64i32 : RI<0x1D, RawFrm, (outs), (ins i32imm:$src),
+ "sbb{q}\t{$src, %rax|%rax, $src}", []>;
+
def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"sbb{q}\t{$src2, $dst|$dst, $src2}",
[(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>;
@@ -665,8 +752,10 @@ let isConvertibleToThreeAddress = 1 in // Can transform into LEA.
def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
"shl{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
-// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is
-// cheaper.
+// NOTE: We don't include patterns for shifts of a register by one, because
+// 'add reg,reg' is cheaper.
+def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t$dst", []>;
} // isTwoAddress
let Uses = [CL] in
@@ -729,6 +818,39 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
[(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
// Rotate instructions
+
+let isTwoAddress = 1 in {
+def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src),
+ "rcl{q}\t{1, $dst|$dst, 1}", []>;
+def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src),
+ "rcl{q}\t{1, $dst|$dst, 1}", []>;
+let Uses = [CL] in {
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+}
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src),
+ "rcr{q}\t{1, $dst|$dst, 1}", []>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src),
+ "rcr{q}\t{1, $dst|$dst, 1}", []>;
+let Uses = [CL] in {
+def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+}
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+}
+
let isTwoAddress = 1 in {
let Uses = [CL] in
def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src),
@@ -839,6 +961,9 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
[(store (not (loadi64 addr:$dst)), addr:$dst)]>;
let Defs = [EFLAGS] in {
+def AND64i32 : RI<0x25, RawFrm, (outs), (ins i32imm:$src),
+ "and{q}\t{$src, %rax|%rax, $src}", []>;
+
let isTwoAddress = 1 in {
let isCommutable = 1 in
def AND64rr : RI<0x21, MRMDestReg,
@@ -912,6 +1037,9 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
[(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
(implicit EFLAGS)]>;
+def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i32imm:$src),
+ "or{q}\t{$src, %rax|%rax, $src}", []>;
+
let isTwoAddress = 1 in {
let isCommutable = 1 in
def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
@@ -945,6 +1073,10 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
"xor{q}\t{$src, $dst|$dst, $src}",
[(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
(implicit EFLAGS)]>;
+
+def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src),
+ "xor{q}\t{$src, %rax|%rax, $src}", []>;
+
} // Defs = [EFLAGS]
//===----------------------------------------------------------------------===//
@@ -953,6 +1085,8 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
// Integer comparison
let Defs = [EFLAGS] in {
+def TEST64i32 : RI<0xa9, RawFrm, (outs), (ins i32imm:$src),
+ "test{q}\t{$src, %rax|%rax, $src}", []>;
let isCommutable = 1 in
def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"test{q}\t{$src2, $src1|$src1, $src2}",
@@ -973,10 +1107,15 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
[(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0),
(implicit EFLAGS)]>;
+
+def CMP64i32 : RI<0x3D, RawFrm, (outs), (ins i32imm:$src),
+ "cmp{q}\t{$src, %rax|%rax, $src}", []>;
def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"cmp{q}\t{$src2, $src1|$src1, $src2}",
[(X86cmp GR64:$src1, GR64:$src2),
(implicit EFLAGS)]>;
+def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}", []>;
def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"cmp{q}\t{$src2, $src1|$src1, $src2}",
[(X86cmp (loadi64 addr:$src1), GR64:$src2),
@@ -1306,14 +1445,12 @@ def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src)
// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's
// equivalent due to implicit zero-extending, and it sometimes has a smaller
// encoding.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-// FIXME: AddedComplexity gives MOV64r0 a higher priority than MOV64ri32. Remove
+// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
// when we have a better way to specify isel priority.
-let Defs = [EFLAGS], AddedComplexity = 1,
- isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins),
- "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
- [(set GR64:$dst, 0)]>;
+let AddedComplexity = 1 in
+def : Pat<(i64 0),
+ (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>;
+
// Materialize i64 constant where top 32-bits are zero.
let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
@@ -1343,12 +1480,12 @@ def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym),
[(X86tlsaddr tls64addr:$sym)]>,
Requires<[In64BitMode]>;
-let AddedComplexity = 5 in
+let AddedComplexity = 5, isCodeGenOnly = 1 in
def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"movq\t%gs:$src, $dst",
[(set GR64:$dst, (gsload addr:$src))]>, SegGS;
-let AddedComplexity = 5 in
+let AddedComplexity = 5, isCodeGenOnly = 1 in
def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"movq\t%fs:$src, $dst",
[(set GR64:$dst, (fsload addr:$src))]>, SegFS;
@@ -1371,11 +1508,43 @@ def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),
"xadd\t$val, $ptr",
[(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
TB, LOCK;
+
def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),
"xchg\t$val, $ptr",
[(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
}
+// Optimized codegen when the non-memory output is not used.
+// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
+def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "lock\n\t"
+ "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
+ (ins i64mem:$dst, i64i8imm :$src2),
+ "lock\n\t"
+ "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
+ (ins i64mem:$dst, i64i32imm :$src2),
+ "lock\n\t"
+ "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "lock\n\t"
+ "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
+ (ins i64mem:$dst, i64i8imm :$src2),
+ "lock\n\t"
+ "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
+ (ins i64mem:$dst, i64i32imm:$src2),
+ "lock\n\t"
+ "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+ "lock\n\t"
+ "inc{q}\t$dst", []>, LOCK;
+def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+ "lock\n\t"
+ "dec{q}\t$dst", []>, LOCK;
+
// Atomic exchange, and, or, xor
let Constraints = "$val = $dst", Defs = [EFLAGS],
usesCustomDAGSchedInserter = 1 in {
@@ -1405,78 +1574,88 @@ def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
[(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
}
+// Segmentation support instructions
+
+// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+// String manipulation instructions
+
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
+// code model mode, should use 'movabs'. FIXME: This is really a hack, the
+// 'movabs' predicate should handle this sort of thing.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+
+// In static codegen with small code model, we can get the address of a label
+// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri64i32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
+
+// In kernel code model, we can get the address of a label
+// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri32 should accept these.
def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
- (MOV64ri tconstpool :$dst)>, Requires<[NotSmallCode]>;
+ (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
- (MOV64ri tjumptable :$dst)>, Requires<[NotSmallCode]>;
+ (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
- (MOV64ri tglobaladdr :$dst)>, Requires<[NotSmallCode]>;
+ (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
- (MOV64ri texternalsym:$dst)>, Requires<[NotSmallCode]>;
+ (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
// If we have small model and -static mode, it is safe to store global addresses
// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
-// should handle this sort of thing.
+// for MOV64mi32 should handle this sort of thing.
def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tconstpool:$src)>,
- Requires<[SmallCode, IsStatic]>;
+ Requires<[NearData, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tjumptable:$src)>,
- Requires<[SmallCode, IsStatic]>;
+ Requires<[NearData, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tglobaladdr:$src)>,
- Requires<[SmallCode, IsStatic]>;
+ Requires<[NearData, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
(MOV64mi32 addr:$dst, texternalsym:$src)>,
- Requires<[SmallCode, IsStatic]>;
-
-// If we have small model and -static mode, it is safe to store global addresses
-// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
-// should handle this sort of thing.
-def : Pat<(store (i64 (X86WrapperRIP tconstpool:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, tconstpool:$src)>,
- Requires<[SmallCode, IsStatic]>;
-def : Pat<(store (i64 (X86WrapperRIP tjumptable:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, tjumptable:$src)>,
- Requires<[SmallCode, IsStatic]>;
-def : Pat<(store (i64 (X86WrapperRIP tglobaladdr:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
- Requires<[SmallCode, IsStatic]>;
-def : Pat<(store (i64 (X86WrapperRIP texternalsym:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, texternalsym:$src)>,
- Requires<[SmallCode, IsStatic]>;
-
+ Requires<[NearData, IsStatic]>;
// Calls
// Direct PC relative function call for small code model. 32-bit displacement
// sign extended to 64-bit.
def : Pat<(X86call (i64 tglobaladdr:$dst)),
- (CALL64pcrel32 tglobaladdr:$dst)>;
+ (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
def : Pat<(X86call (i64 texternalsym:$dst)),
- (CALL64pcrel32 texternalsym:$dst)>;
-
-def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
- (CALL64pcrel32 tglobaladdr:$dst)>;
-def : Pat<(X86tailcall (i64 texternalsym:$dst)),
- (CALL64pcrel32 texternalsym:$dst)>;
-
-def : Pat<(X86tailcall GR64:$dst),
- (CALL64r GR64:$dst)>;
+ (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
// tailcall stuff
-def : Pat<(X86tailcall GR32:$dst),
- (TAILCALL)>;
-def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
- (TAILCALL)>;
-def : Pat<(X86tailcall (i64 texternalsym:$dst)),
- (TAILCALL)>;
-
def : Pat<(X86tcret GR64:$dst, imm:$off),
(TCRETURNri64 GR64:$dst, imm:$off)>;
@@ -1540,30 +1719,15 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
// For other extloads, use subregs, since the high contents of the register are
// defined after an extload.
def : Pat<(extloadi64i32 addr:$src),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOV32rm addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
x86_subreg_32bit)>;
-def : Pat<(extloadi16i1 addr:$src),
- (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src),
- x86_subreg_8bit)>,
- Requires<[In64BitMode]>;
-def : Pat<(extloadi16i8 addr:$src),
- (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src),
- x86_subreg_8bit)>,
- Requires<[In64BitMode]>;
-
-// anyext
-def : Pat<(i64 (anyext GR8:$src)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>;
-def : Pat<(i64 (anyext GR16:$src)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
-def : Pat<(i64 (anyext GR32:$src)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, x86_subreg_32bit)>;
-def : Pat<(i16 (anyext GR8:$src)),
- (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>,
- Requires<[In64BitMode]>;
-def : Pat<(i32 (anyext GR8:$src)),
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>,
- Requires<[In64BitMode]>;
+
+// anyext. Define these to do an explicit zero-extend to
+// avoid partial-register updates.
+def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
+def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
+def : Pat<(i64 (anyext GR32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
//===----------------------------------------------------------------------===//
// Some peepholes
@@ -1661,6 +1825,11 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
(EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
x86_subreg_8bit_hi))>,
Requires<[In64BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In64BitMode]>;
def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
(SUBREG_TO_REG
(i64 0),
@@ -1668,6 +1837,13 @@ def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
(EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
x86_subreg_8bit_hi)),
x86_subreg_32bit)>;
+def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi)),
+ x86_subreg_32bit)>;
// h-register extract and store.
def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
@@ -1906,6 +2082,102 @@ def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst),
(implicit EFLAGS)),
(DEC64m addr:$dst)>;
+// Register-Register Logical Or with EFLAGS result
+def : Pat<(parallel (X86or_flag GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)),
+ (OR64rr GR64:$src1, GR64:$src2)>;
+
+// Register-Integer Logical Or with EFLAGS result
+def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)),
+ (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt32:$src2),
+ (implicit EFLAGS)),
+ (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Register-Memory Logical Or with EFLAGS result
+def : Pat<(parallel (X86or_flag GR64:$src1, (loadi64 addr:$src2)),
+ (implicit EFLAGS)),
+ (OR64rm GR64:$src1, addr:$src2)>;
+
+// Memory-Register Logical Or with EFLAGS result
+def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), GR64:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (OR64mr addr:$dst, GR64:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (OR64mi8 addr:$dst, i64immSExt8:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (OR64mi32 addr:$dst, i64immSExt32:$src2)>;
+
+// Register-Register Logical XOr with EFLAGS result
+def : Pat<(parallel (X86xor_flag GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)),
+ (XOR64rr GR64:$src1, GR64:$src2)>;
+
+// Register-Integer Logical XOr with EFLAGS result
+def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)),
+ (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt32:$src2),
+ (implicit EFLAGS)),
+ (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Register-Memory Logical XOr with EFLAGS result
+def : Pat<(parallel (X86xor_flag GR64:$src1, (loadi64 addr:$src2)),
+ (implicit EFLAGS)),
+ (XOR64rm GR64:$src1, addr:$src2)>;
+
+// Memory-Register Logical XOr with EFLAGS result
+def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), GR64:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (XOR64mr addr:$dst, GR64:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (XOR64mi8 addr:$dst, i64immSExt8:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (XOR64mi32 addr:$dst, i64immSExt32:$src2)>;
+
+// Register-Register Logical And with EFLAGS result
+def : Pat<(parallel (X86and_flag GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)),
+ (AND64rr GR64:$src1, GR64:$src2)>;
+
+// Register-Integer Logical And with EFLAGS result
+def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)),
+ (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt32:$src2),
+ (implicit EFLAGS)),
+ (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Register-Memory Logical And with EFLAGS result
+def : Pat<(parallel (X86and_flag GR64:$src1, (loadi64 addr:$src2)),
+ (implicit EFLAGS)),
+ (AND64rm GR64:$src1, addr:$src2)>;
+
+// Memory-Register Logical And with EFLAGS result
+def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), GR64:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (AND64mr addr:$dst, GR64:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (AND64mi8 addr:$dst, i64immSExt8:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (AND64mi32 addr:$dst, i64immSExt32:$src2)>;
+
//===----------------------------------------------------------------------===//
// X86-64 SSE Instructions
//===----------------------------------------------------------------------===//
@@ -1977,3 +2249,15 @@ let isTwoAddress = 1 in {
}
defm PINSRQ : SS41I_insert64<0x22, "pinsrq">;
+
+// -disable-16bit support.
+def : Pat<(truncstorei16 (i64 imm:$src), addr:$dst),
+ (MOV16mi addr:$dst, imm:$src)>;
+def : Pat<(truncstorei16 GR64:$src, addr:$dst),
+ (MOV16mr addr:$dst, (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
+def : Pat<(i64 (sextloadi16 addr:$dst)),
+ (MOVSX64rm16 addr:$dst)>;
+def : Pat<(i64 (zextloadi16 addr:$dst)),
+ (MOVZX64rm16 addr:$dst)>;
+def : Pat<(i64 (extloadi16 addr:$dst)),
+ (MOVZX64rm16 addr:$dst)>;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 6359542..c475b56 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
namespace llvm {
@@ -47,7 +48,7 @@ struct X86AddressMode {
unsigned Scale;
unsigned IndexReg;
- unsigned Disp;
+ int Disp;
GlobalValue *GV;
unsigned GVOpFlags;
@@ -61,20 +62,20 @@ struct X86AddressMode {
/// current instruction -- that is, a dereference of an address in a register,
/// with no scale, index or displacement. An example is: DWORD PTR [EAX].
///
-inline const MachineInstrBuilder &addDirectMem(const MachineInstrBuilder &MIB,
- unsigned Reg) {
+static inline const MachineInstrBuilder &
+addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
// Because memory references are always represented with four
// values, this adds: Reg, [1, NoReg, 0] to the instruction.
return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0);
}
-inline const MachineInstrBuilder &addLeaOffset(const MachineInstrBuilder &MIB,
- int Offset) {
+static inline const MachineInstrBuilder &
+addLeaOffset(const MachineInstrBuilder &MIB, int Offset) {
return MIB.addImm(1).addReg(0).addImm(Offset);
}
-inline const MachineInstrBuilder &addOffset(const MachineInstrBuilder &MIB,
- int Offset) {
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, int Offset) {
return addLeaOffset(MIB, Offset).addReg(0);
}
@@ -82,29 +83,29 @@ inline const MachineInstrBuilder &addOffset(const MachineInstrBuilder &MIB,
/// [Reg + Offset], i.e., one with no scale or index, but with a
/// displacement. An example is: DWORD PTR [EAX + 4].
///
-inline const MachineInstrBuilder &addRegOffset(const MachineInstrBuilder &MIB,
- unsigned Reg, bool isKill,
- int Offset) {
+static inline const MachineInstrBuilder &
+addRegOffset(const MachineInstrBuilder &MIB,
+ unsigned Reg, bool isKill, int Offset) {
return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
}
-inline const MachineInstrBuilder &addLeaRegOffset(const MachineInstrBuilder &MIB,
- unsigned Reg, bool isKill,
- int Offset) {
+static inline const MachineInstrBuilder &
+addLeaRegOffset(const MachineInstrBuilder &MIB,
+ unsigned Reg, bool isKill, int Offset) {
return addLeaOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
}
/// addRegReg - This function is used to add a memory reference of the form:
/// [Reg + Reg].
-inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
+static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
unsigned Reg1, bool isKill1,
unsigned Reg2, bool isKill2) {
return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1)
.addReg(Reg2, getKillRegState(isKill2)).addImm(0);
}
-inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB,
- const X86AddressMode &AM) {
+static inline const MachineInstrBuilder &
+addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) {
assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
if (AM.BaseType == X86AddressMode::RegBase)
@@ -120,8 +121,9 @@ inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB,
return MIB.addImm(AM.Disp);
}
-inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
- const X86AddressMode &AM) {
+static inline const MachineInstrBuilder &
+addFullAddress(const MachineInstrBuilder &MIB,
+ const X86AddressMode &AM) {
return addLeaAddress(MIB, AM).addReg(0);
}
@@ -130,7 +132,7 @@ inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
/// reference has base register as the FrameIndex offset until it is resolved.
/// This allows a constant offset to be specified as well...
///
-inline const MachineInstrBuilder &
+static inline const MachineInstrBuilder &
addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
MachineInstr *MI = MIB;
MachineFunction &MF = *MI->getParent()->getParent();
@@ -141,11 +143,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
Flags |= MachineMemOperand::MOLoad;
if (TID.mayStore())
Flags |= MachineMemOperand::MOStore;
- MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FI),
- Flags,
- MFI.getObjectOffset(FI) + Offset,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
+ Flags, Offset,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
return addOffset(MIB.addFrameIndex(FI), Offset)
.addMemOperand(MMO);
}
@@ -157,7 +159,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
/// the GlobalBaseReg parameter can be used to make this a
/// GlobalBaseReg-relative reference.
///
-inline const MachineInstrBuilder &
+static inline const MachineInstrBuilder &
addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
unsigned GlobalBaseReg, unsigned char OpFlags) {
//FIXME: factor this
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index bc7def4..7e37373 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -303,6 +303,31 @@ def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
}
def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
+// Versions of FP instructions that take a single memory operand. Added for the
+// disassembler; remove as they are included with patterns elsewhere.
+def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom\t$src">;
+def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp\t$src">;
+
+def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
+def FSTENVm : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fstenv\t$dst">;
+
+def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
+def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
+
+def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom\t$src">;
+def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp\t$src">;
+
+def FISTTP32m: FPI<0xDD, MRM1m, (outs i32mem:$dst), (ins), "fisttp{l}\t$dst">;
+def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
+def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fsave\t$dst">;
+def FSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fstsw\t$dst">;
+
+def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{w}\t$src">;
+def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{w}\t$src">;
+
+def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
+def FBSTPm : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
+
// Floating point cmovs.
multiclass FPCMov<PatLeaf cc> {
def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index eeed5bd..abdb313 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -79,6 +79,7 @@ class XD { bits<4> Prefix = 11; }
class XS { bits<4> Prefix = 12; }
class T8 { bits<4> Prefix = 13; }
class TA { bits<4> Prefix = 14; }
+class TF { bits<4> Prefix = 15; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr>
@@ -142,6 +143,24 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
let Pattern = pattern;
}
+// Templates for instructions that use a 16- or 32-bit segmented address as
+// their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
+//
+// Iseg16 - 16-bit segment selector, 16-bit offset
+// Iseg32 - 16-bit segment selector, 32-bit offset
+
+class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
// SSE1 Instruction Templates:
//
// SSI - SSE1 instructions with XS prefix.
@@ -229,6 +248,16 @@ class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
+// SS42FI - SSE 4.2 instructions with TF prefix.
+class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
+
+// SS42AI = SSE 4.2 instructions with TA prefix
+class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>;
+
// X86-64 Instruction templates...
//
@@ -282,4 +311,3 @@ class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> patter
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
-
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index e5d84c5..e8a39d1 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -18,8 +18,8 @@
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/GlobalVariable.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -27,24 +27,24 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
using namespace llvm;
-namespace {
- cl::opt<bool>
- NoFusing("disable-spill-fusing",
- cl::desc("Disable fusing of spill code into instructions"));
- cl::opt<bool>
- PrintFailedFusing("print-failed-fuse-candidates",
- cl::desc("Print instructions that the allocator wants to"
- " fuse, but the X86 backend currently can't"),
- cl::Hidden);
- cl::opt<bool>
- ReMatPICStubLoad("remat-pic-stub-load",
- cl::desc("Re-materialize load from stub in PIC mode"),
- cl::init(false), cl::Hidden);
-}
+static cl::opt<bool>
+NoFusing("disable-spill-fusing",
+ cl::desc("Disable fusing of spill code into instructions"));
+static cl::opt<bool>
+PrintFailedFusing("print-failed-fuse-candidates",
+ cl::desc("Print instructions that the allocator wants to"
+ " fuse, but the X86 backend currently can't"),
+ cl::Hidden);
+static cl::opt<bool>
+ReMatPICStubLoad("remat-pic-stub-load",
+ cl::desc("Re-materialize load from stub in PIC mode"),
+ cl::init(false), cl::Hidden);
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
@@ -212,9 +212,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
unsigned RegOp = OpTbl2Addr[i][0];
unsigned MemOp = OpTbl2Addr[i][1];
if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp,
- MemOp)).second)
+ std::make_pair(MemOp,0))).second)
assert(false && "Duplicated entries?");
- unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store
+ // Index 0, folded load and store, no alignment requirement.
+ unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
std::make_pair(RegOp,
AuxInfo))).second)
@@ -222,93 +223,94 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
}
// If the third value is 1, then it's folding either a load or a store.
- static const unsigned OpTbl0[][3] = {
- { X86::BT16ri8, X86::BT16mi8, 1 },
- { X86::BT32ri8, X86::BT32mi8, 1 },
- { X86::BT64ri8, X86::BT64mi8, 1 },
- { X86::CALL32r, X86::CALL32m, 1 },
- { X86::CALL64r, X86::CALL64m, 1 },
- { X86::CMP16ri, X86::CMP16mi, 1 },
- { X86::CMP16ri8, X86::CMP16mi8, 1 },
- { X86::CMP16rr, X86::CMP16mr, 1 },
- { X86::CMP32ri, X86::CMP32mi, 1 },
- { X86::CMP32ri8, X86::CMP32mi8, 1 },
- { X86::CMP32rr, X86::CMP32mr, 1 },
- { X86::CMP64ri32, X86::CMP64mi32, 1 },
- { X86::CMP64ri8, X86::CMP64mi8, 1 },
- { X86::CMP64rr, X86::CMP64mr, 1 },
- { X86::CMP8ri, X86::CMP8mi, 1 },
- { X86::CMP8rr, X86::CMP8mr, 1 },
- { X86::DIV16r, X86::DIV16m, 1 },
- { X86::DIV32r, X86::DIV32m, 1 },
- { X86::DIV64r, X86::DIV64m, 1 },
- { X86::DIV8r, X86::DIV8m, 1 },
- { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0 },
- { X86::FsMOVAPDrr, X86::MOVSDmr, 0 },
- { X86::FsMOVAPSrr, X86::MOVSSmr, 0 },
- { X86::IDIV16r, X86::IDIV16m, 1 },
- { X86::IDIV32r, X86::IDIV32m, 1 },
- { X86::IDIV64r, X86::IDIV64m, 1 },
- { X86::IDIV8r, X86::IDIV8m, 1 },
- { X86::IMUL16r, X86::IMUL16m, 1 },
- { X86::IMUL32r, X86::IMUL32m, 1 },
- { X86::IMUL64r, X86::IMUL64m, 1 },
- { X86::IMUL8r, X86::IMUL8m, 1 },
- { X86::JMP32r, X86::JMP32m, 1 },
- { X86::JMP64r, X86::JMP64m, 1 },
- { X86::MOV16ri, X86::MOV16mi, 0 },
- { X86::MOV16rr, X86::MOV16mr, 0 },
- { X86::MOV32ri, X86::MOV32mi, 0 },
- { X86::MOV32rr, X86::MOV32mr, 0 },
- { X86::MOV64ri32, X86::MOV64mi32, 0 },
- { X86::MOV64rr, X86::MOV64mr, 0 },
- { X86::MOV8ri, X86::MOV8mi, 0 },
- { X86::MOV8rr, X86::MOV8mr, 0 },
- { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0 },
- { X86::MOVAPDrr, X86::MOVAPDmr, 0 },
- { X86::MOVAPSrr, X86::MOVAPSmr, 0 },
- { X86::MOVDQArr, X86::MOVDQAmr, 0 },
- { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 },
- { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 },
- { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0 },
- { X86::MOVSDrr, X86::MOVSDmr, 0 },
- { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 },
- { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0 },
- { X86::MOVSSrr, X86::MOVSSmr, 0 },
- { X86::MOVUPDrr, X86::MOVUPDmr, 0 },
- { X86::MOVUPSrr, X86::MOVUPSmr, 0 },
- { X86::MUL16r, X86::MUL16m, 1 },
- { X86::MUL32r, X86::MUL32m, 1 },
- { X86::MUL64r, X86::MUL64m, 1 },
- { X86::MUL8r, X86::MUL8m, 1 },
- { X86::SETAEr, X86::SETAEm, 0 },
- { X86::SETAr, X86::SETAm, 0 },
- { X86::SETBEr, X86::SETBEm, 0 },
- { X86::SETBr, X86::SETBm, 0 },
- { X86::SETEr, X86::SETEm, 0 },
- { X86::SETGEr, X86::SETGEm, 0 },
- { X86::SETGr, X86::SETGm, 0 },
- { X86::SETLEr, X86::SETLEm, 0 },
- { X86::SETLr, X86::SETLm, 0 },
- { X86::SETNEr, X86::SETNEm, 0 },
- { X86::SETNOr, X86::SETNOm, 0 },
- { X86::SETNPr, X86::SETNPm, 0 },
- { X86::SETNSr, X86::SETNSm, 0 },
- { X86::SETOr, X86::SETOm, 0 },
- { X86::SETPr, X86::SETPm, 0 },
- { X86::SETSr, X86::SETSm, 0 },
- { X86::TAILJMPr, X86::TAILJMPm, 1 },
- { X86::TEST16ri, X86::TEST16mi, 1 },
- { X86::TEST32ri, X86::TEST32mi, 1 },
- { X86::TEST64ri32, X86::TEST64mi32, 1 },
- { X86::TEST8ri, X86::TEST8mi, 1 }
+ static const unsigned OpTbl0[][4] = {
+ { X86::BT16ri8, X86::BT16mi8, 1, 0 },
+ { X86::BT32ri8, X86::BT32mi8, 1, 0 },
+ { X86::BT64ri8, X86::BT64mi8, 1, 0 },
+ { X86::CALL32r, X86::CALL32m, 1, 0 },
+ { X86::CALL64r, X86::CALL64m, 1, 0 },
+ { X86::CMP16ri, X86::CMP16mi, 1, 0 },
+ { X86::CMP16ri8, X86::CMP16mi8, 1, 0 },
+ { X86::CMP16rr, X86::CMP16mr, 1, 0 },
+ { X86::CMP32ri, X86::CMP32mi, 1, 0 },
+ { X86::CMP32ri8, X86::CMP32mi8, 1, 0 },
+ { X86::CMP32rr, X86::CMP32mr, 1, 0 },
+ { X86::CMP64ri32, X86::CMP64mi32, 1, 0 },
+ { X86::CMP64ri8, X86::CMP64mi8, 1, 0 },
+ { X86::CMP64rr, X86::CMP64mr, 1, 0 },
+ { X86::CMP8ri, X86::CMP8mi, 1, 0 },
+ { X86::CMP8rr, X86::CMP8mr, 1, 0 },
+ { X86::DIV16r, X86::DIV16m, 1, 0 },
+ { X86::DIV32r, X86::DIV32m, 1, 0 },
+ { X86::DIV64r, X86::DIV64m, 1, 0 },
+ { X86::DIV8r, X86::DIV8m, 1, 0 },
+ { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
+ { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 },
+ { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 },
+ { X86::IDIV16r, X86::IDIV16m, 1, 0 },
+ { X86::IDIV32r, X86::IDIV32m, 1, 0 },
+ { X86::IDIV64r, X86::IDIV64m, 1, 0 },
+ { X86::IDIV8r, X86::IDIV8m, 1, 0 },
+ { X86::IMUL16r, X86::IMUL16m, 1, 0 },
+ { X86::IMUL32r, X86::IMUL32m, 1, 0 },
+ { X86::IMUL64r, X86::IMUL64m, 1, 0 },
+ { X86::IMUL8r, X86::IMUL8m, 1, 0 },
+ { X86::JMP32r, X86::JMP32m, 1, 0 },
+ { X86::JMP64r, X86::JMP64m, 1, 0 },
+ { X86::MOV16ri, X86::MOV16mi, 0, 0 },
+ { X86::MOV16rr, X86::MOV16mr, 0, 0 },
+ { X86::MOV32ri, X86::MOV32mi, 0, 0 },
+ { X86::MOV32rr, X86::MOV32mr, 0, 0 },
+ { X86::MOV64ri32, X86::MOV64mi32, 0, 0 },
+ { X86::MOV64rr, X86::MOV64mr, 0, 0 },
+ { X86::MOV8ri, X86::MOV8mi, 0, 0 },
+ { X86::MOV8rr, X86::MOV8mr, 0, 0 },
+ { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 },
+ { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 },
+ { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 },
+ { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 },
+ { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
+ { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
+ { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 },
+ { X86::MOVSDrr, X86::MOVSDmr, 0, 0 },
+ { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
+ { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 },
+ { X86::MOVSSrr, X86::MOVSSmr, 0, 0 },
+ { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 },
+ { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 },
+ { X86::MUL16r, X86::MUL16m, 1, 0 },
+ { X86::MUL32r, X86::MUL32m, 1, 0 },
+ { X86::MUL64r, X86::MUL64m, 1, 0 },
+ { X86::MUL8r, X86::MUL8m, 1, 0 },
+ { X86::SETAEr, X86::SETAEm, 0, 0 },
+ { X86::SETAr, X86::SETAm, 0, 0 },
+ { X86::SETBEr, X86::SETBEm, 0, 0 },
+ { X86::SETBr, X86::SETBm, 0, 0 },
+ { X86::SETEr, X86::SETEm, 0, 0 },
+ { X86::SETGEr, X86::SETGEm, 0, 0 },
+ { X86::SETGr, X86::SETGm, 0, 0 },
+ { X86::SETLEr, X86::SETLEm, 0, 0 },
+ { X86::SETLr, X86::SETLm, 0, 0 },
+ { X86::SETNEr, X86::SETNEm, 0, 0 },
+ { X86::SETNOr, X86::SETNOm, 0, 0 },
+ { X86::SETNPr, X86::SETNPm, 0, 0 },
+ { X86::SETNSr, X86::SETNSm, 0, 0 },
+ { X86::SETOr, X86::SETOm, 0, 0 },
+ { X86::SETPr, X86::SETPm, 0, 0 },
+ { X86::SETSr, X86::SETSm, 0, 0 },
+ { X86::TAILJMPr, X86::TAILJMPm, 1, 0 },
+ { X86::TEST16ri, X86::TEST16mi, 1, 0 },
+ { X86::TEST32ri, X86::TEST32mi, 1, 0 },
+ { X86::TEST64ri32, X86::TEST64mi32, 1, 0 },
+ { X86::TEST8ri, X86::TEST8mi, 1, 0 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
unsigned RegOp = OpTbl0[i][0];
unsigned MemOp = OpTbl0[i][1];
+ unsigned Align = OpTbl0[i][3];
if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp,
- MemOp)).second)
+ std::make_pair(MemOp,Align))).second)
assert(false && "Duplicated entries?");
unsigned FoldedLoad = OpTbl0[i][2];
// Index 0, folded load or store.
@@ -319,338 +321,342 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
AmbEntries.push_back(MemOp);
}
- static const unsigned OpTbl1[][2] = {
- { X86::CMP16rr, X86::CMP16rm },
- { X86::CMP32rr, X86::CMP32rm },
- { X86::CMP64rr, X86::CMP64rm },
- { X86::CMP8rr, X86::CMP8rm },
- { X86::CVTSD2SSrr, X86::CVTSD2SSrm },
- { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm },
- { X86::CVTSI2SDrr, X86::CVTSI2SDrm },
- { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm },
- { X86::CVTSI2SSrr, X86::CVTSI2SSrm },
- { X86::CVTSS2SDrr, X86::CVTSS2SDrm },
- { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm },
- { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm },
- { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm },
- { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm },
- { X86::FsMOVAPDrr, X86::MOVSDrm },
- { X86::FsMOVAPSrr, X86::MOVSSrm },
- { X86::IMUL16rri, X86::IMUL16rmi },
- { X86::IMUL16rri8, X86::IMUL16rmi8 },
- { X86::IMUL32rri, X86::IMUL32rmi },
- { X86::IMUL32rri8, X86::IMUL32rmi8 },
- { X86::IMUL64rri32, X86::IMUL64rmi32 },
- { X86::IMUL64rri8, X86::IMUL64rmi8 },
- { X86::Int_CMPSDrr, X86::Int_CMPSDrm },
- { X86::Int_CMPSSrr, X86::Int_CMPSSrm },
- { X86::Int_COMISDrr, X86::Int_COMISDrm },
- { X86::Int_COMISSrr, X86::Int_COMISSrm },
- { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm },
- { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm },
- { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm },
- { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm },
- { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm },
- { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm },
- { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm },
- { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm },
- { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm },
- { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm },
- { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm },
- { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm },
- { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm },
- { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm },
- { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm },
- { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm },
- { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm },
- { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm },
- { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm },
- { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm },
- { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm },
- { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm },
- { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm },
- { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm },
- { X86::MOV16rr, X86::MOV16rm },
- { X86::MOV32rr, X86::MOV32rm },
- { X86::MOV64rr, X86::MOV64rm },
- { X86::MOV64toPQIrr, X86::MOVQI2PQIrm },
- { X86::MOV64toSDrr, X86::MOV64toSDrm },
- { X86::MOV8rr, X86::MOV8rm },
- { X86::MOVAPDrr, X86::MOVAPDrm },
- { X86::MOVAPSrr, X86::MOVAPSrm },
- { X86::MOVDDUPrr, X86::MOVDDUPrm },
- { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm },
- { X86::MOVDI2SSrr, X86::MOVDI2SSrm },
- { X86::MOVDQArr, X86::MOVDQArm },
- { X86::MOVSD2PDrr, X86::MOVSD2PDrm },
- { X86::MOVSDrr, X86::MOVSDrm },
- { X86::MOVSHDUPrr, X86::MOVSHDUPrm },
- { X86::MOVSLDUPrr, X86::MOVSLDUPrm },
- { X86::MOVSS2PSrr, X86::MOVSS2PSrm },
- { X86::MOVSSrr, X86::MOVSSrm },
- { X86::MOVSX16rr8, X86::MOVSX16rm8 },
- { X86::MOVSX32rr16, X86::MOVSX32rm16 },
- { X86::MOVSX32rr8, X86::MOVSX32rm8 },
- { X86::MOVSX64rr16, X86::MOVSX64rm16 },
- { X86::MOVSX64rr32, X86::MOVSX64rm32 },
- { X86::MOVSX64rr8, X86::MOVSX64rm8 },
- { X86::MOVUPDrr, X86::MOVUPDrm },
- { X86::MOVUPSrr, X86::MOVUPSrm },
- { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm },
- { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm },
- { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm },
- { X86::MOVZX16rr8, X86::MOVZX16rm8 },
- { X86::MOVZX32rr16, X86::MOVZX32rm16 },
- { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 },
- { X86::MOVZX32rr8, X86::MOVZX32rm8 },
- { X86::MOVZX64rr16, X86::MOVZX64rm16 },
- { X86::MOVZX64rr32, X86::MOVZX64rm32 },
- { X86::MOVZX64rr8, X86::MOVZX64rm8 },
- { X86::PSHUFDri, X86::PSHUFDmi },
- { X86::PSHUFHWri, X86::PSHUFHWmi },
- { X86::PSHUFLWri, X86::PSHUFLWmi },
- { X86::RCPPSr, X86::RCPPSm },
- { X86::RCPPSr_Int, X86::RCPPSm_Int },
- { X86::RSQRTPSr, X86::RSQRTPSm },
- { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int },
- { X86::RSQRTSSr, X86::RSQRTSSm },
- { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int },
- { X86::SQRTPDr, X86::SQRTPDm },
- { X86::SQRTPDr_Int, X86::SQRTPDm_Int },
- { X86::SQRTPSr, X86::SQRTPSm },
- { X86::SQRTPSr_Int, X86::SQRTPSm_Int },
- { X86::SQRTSDr, X86::SQRTSDm },
- { X86::SQRTSDr_Int, X86::SQRTSDm_Int },
- { X86::SQRTSSr, X86::SQRTSSm },
- { X86::SQRTSSr_Int, X86::SQRTSSm_Int },
- { X86::TEST16rr, X86::TEST16rm },
- { X86::TEST32rr, X86::TEST32rm },
- { X86::TEST64rr, X86::TEST64rm },
- { X86::TEST8rr, X86::TEST8rm },
+ static const unsigned OpTbl1[][3] = {
+ { X86::CMP16rr, X86::CMP16rm, 0 },
+ { X86::CMP32rr, X86::CMP32rm, 0 },
+ { X86::CMP64rr, X86::CMP64rm, 0 },
+ { X86::CMP8rr, X86::CMP8rm, 0 },
+ { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
+ { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
+ { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
+ { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
+ { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
+ { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
+ { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
+ { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
+ { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
+ { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDrm, 0 },
+ { X86::FsMOVAPSrr, X86::MOVSSrm, 0 },
+ { X86::IMUL16rri, X86::IMUL16rmi, 0 },
+ { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
+ { X86::IMUL32rri, X86::IMUL32rmi, 0 },
+ { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
+ { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
+ { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
+ { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
+ { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
+ { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
+ { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
+ { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 },
+ { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 },
+ { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 },
+ { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 },
+ { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 },
+ { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
+ { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 },
+ { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
+ { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
+ { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 },
+ { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 },
+ { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 },
+ { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
+ { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
+ { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
+ { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
+ { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 },
+ { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
+ { X86::MOV16rr, X86::MOV16rm, 0 },
+ { X86::MOV32rr, X86::MOV32rm, 0 },
+ { X86::MOV64rr, X86::MOV64rm, 0 },
+ { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
+ { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
+ { X86::MOV8rr, X86::MOV8rm, 0 },
+ { X86::MOVAPDrr, X86::MOVAPDrm, 16 },
+ { X86::MOVAPSrr, X86::MOVAPSrm, 16 },
+ { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
+ { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
+ { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
+ { X86::MOVDQArr, X86::MOVDQArm, 16 },
+ { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 },
+ { X86::MOVSDrr, X86::MOVSDrm, 0 },
+ { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 },
+ { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 },
+ { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 },
+ { X86::MOVSSrr, X86::MOVSSrm, 0 },
+ { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
+ { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
+ { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
+ { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
+ { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
+ { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
+ { X86::MOVUPDrr, X86::MOVUPDrm, 16 },
+ { X86::MOVUPSrr, X86::MOVUPSrm, 16 },
+ { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 },
+ { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
+ { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
+ { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
+ { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
+ { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
+ { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
+ { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
+ { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
+ { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
+ { X86::PSHUFDri, X86::PSHUFDmi, 16 },
+ { X86::PSHUFHWri, X86::PSHUFHWmi, 16 },
+ { X86::PSHUFLWri, X86::PSHUFLWmi, 16 },
+ { X86::RCPPSr, X86::RCPPSm, 16 },
+ { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 },
+ { X86::RSQRTPSr, X86::RSQRTPSm, 16 },
+ { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 },
+ { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
+ { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 },
+ { X86::SQRTPDr, X86::SQRTPDm, 16 },
+ { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 },
+ { X86::SQRTPSr, X86::SQRTPSm, 16 },
+ { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 },
+ { X86::SQRTSDr, X86::SQRTSDm, 0 },
+ { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 },
+ { X86::SQRTSSr, X86::SQRTSSm, 0 },
+ { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 },
+ { X86::TEST16rr, X86::TEST16rm, 0 },
+ { X86::TEST32rr, X86::TEST32rm, 0 },
+ { X86::TEST64rr, X86::TEST64rm, 0 },
+ { X86::TEST8rr, X86::TEST8rm, 0 },
// FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
- { X86::UCOMISDrr, X86::UCOMISDrm },
- { X86::UCOMISSrr, X86::UCOMISSrm }
+ { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
+ { X86::UCOMISSrr, X86::UCOMISSrm, 0 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
unsigned RegOp = OpTbl1[i][0];
unsigned MemOp = OpTbl1[i][1];
+ unsigned Align = OpTbl1[i][2];
if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp,
- MemOp)).second)
+ std::make_pair(MemOp,Align))).second)
assert(false && "Duplicated entries?");
- unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load
+ // Index 1, folded load
+ unsigned AuxInfo = 1 | (1 << 4);
if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
std::make_pair(RegOp, AuxInfo))).second)
AmbEntries.push_back(MemOp);
}
- static const unsigned OpTbl2[][2] = {
- { X86::ADC32rr, X86::ADC32rm },
- { X86::ADC64rr, X86::ADC64rm },
- { X86::ADD16rr, X86::ADD16rm },
- { X86::ADD32rr, X86::ADD32rm },
- { X86::ADD64rr, X86::ADD64rm },
- { X86::ADD8rr, X86::ADD8rm },
- { X86::ADDPDrr, X86::ADDPDrm },
- { X86::ADDPSrr, X86::ADDPSrm },
- { X86::ADDSDrr, X86::ADDSDrm },
- { X86::ADDSSrr, X86::ADDSSrm },
- { X86::ADDSUBPDrr, X86::ADDSUBPDrm },
- { X86::ADDSUBPSrr, X86::ADDSUBPSrm },
- { X86::AND16rr, X86::AND16rm },
- { X86::AND32rr, X86::AND32rm },
- { X86::AND64rr, X86::AND64rm },
- { X86::AND8rr, X86::AND8rm },
- { X86::ANDNPDrr, X86::ANDNPDrm },
- { X86::ANDNPSrr, X86::ANDNPSrm },
- { X86::ANDPDrr, X86::ANDPDrm },
- { X86::ANDPSrr, X86::ANDPSrm },
- { X86::CMOVA16rr, X86::CMOVA16rm },
- { X86::CMOVA32rr, X86::CMOVA32rm },
- { X86::CMOVA64rr, X86::CMOVA64rm },
- { X86::CMOVAE16rr, X86::CMOVAE16rm },
- { X86::CMOVAE32rr, X86::CMOVAE32rm },
- { X86::CMOVAE64rr, X86::CMOVAE64rm },
- { X86::CMOVB16rr, X86::CMOVB16rm },
- { X86::CMOVB32rr, X86::CMOVB32rm },
- { X86::CMOVB64rr, X86::CMOVB64rm },
- { X86::CMOVBE16rr, X86::CMOVBE16rm },
- { X86::CMOVBE32rr, X86::CMOVBE32rm },
- { X86::CMOVBE64rr, X86::CMOVBE64rm },
- { X86::CMOVE16rr, X86::CMOVE16rm },
- { X86::CMOVE32rr, X86::CMOVE32rm },
- { X86::CMOVE64rr, X86::CMOVE64rm },
- { X86::CMOVG16rr, X86::CMOVG16rm },
- { X86::CMOVG32rr, X86::CMOVG32rm },
- { X86::CMOVG64rr, X86::CMOVG64rm },
- { X86::CMOVGE16rr, X86::CMOVGE16rm },
- { X86::CMOVGE32rr, X86::CMOVGE32rm },
- { X86::CMOVGE64rr, X86::CMOVGE64rm },
- { X86::CMOVL16rr, X86::CMOVL16rm },
- { X86::CMOVL32rr, X86::CMOVL32rm },
- { X86::CMOVL64rr, X86::CMOVL64rm },
- { X86::CMOVLE16rr, X86::CMOVLE16rm },
- { X86::CMOVLE32rr, X86::CMOVLE32rm },
- { X86::CMOVLE64rr, X86::CMOVLE64rm },
- { X86::CMOVNE16rr, X86::CMOVNE16rm },
- { X86::CMOVNE32rr, X86::CMOVNE32rm },
- { X86::CMOVNE64rr, X86::CMOVNE64rm },
- { X86::CMOVNO16rr, X86::CMOVNO16rm },
- { X86::CMOVNO32rr, X86::CMOVNO32rm },
- { X86::CMOVNO64rr, X86::CMOVNO64rm },
- { X86::CMOVNP16rr, X86::CMOVNP16rm },
- { X86::CMOVNP32rr, X86::CMOVNP32rm },
- { X86::CMOVNP64rr, X86::CMOVNP64rm },
- { X86::CMOVNS16rr, X86::CMOVNS16rm },
- { X86::CMOVNS32rr, X86::CMOVNS32rm },
- { X86::CMOVNS64rr, X86::CMOVNS64rm },
- { X86::CMOVO16rr, X86::CMOVO16rm },
- { X86::CMOVO32rr, X86::CMOVO32rm },
- { X86::CMOVO64rr, X86::CMOVO64rm },
- { X86::CMOVP16rr, X86::CMOVP16rm },
- { X86::CMOVP32rr, X86::CMOVP32rm },
- { X86::CMOVP64rr, X86::CMOVP64rm },
- { X86::CMOVS16rr, X86::CMOVS16rm },
- { X86::CMOVS32rr, X86::CMOVS32rm },
- { X86::CMOVS64rr, X86::CMOVS64rm },
- { X86::CMPPDrri, X86::CMPPDrmi },
- { X86::CMPPSrri, X86::CMPPSrmi },
- { X86::CMPSDrr, X86::CMPSDrm },
- { X86::CMPSSrr, X86::CMPSSrm },
- { X86::DIVPDrr, X86::DIVPDrm },
- { X86::DIVPSrr, X86::DIVPSrm },
- { X86::DIVSDrr, X86::DIVSDrm },
- { X86::DIVSSrr, X86::DIVSSrm },
- { X86::FsANDNPDrr, X86::FsANDNPDrm },
- { X86::FsANDNPSrr, X86::FsANDNPSrm },
- { X86::FsANDPDrr, X86::FsANDPDrm },
- { X86::FsANDPSrr, X86::FsANDPSrm },
- { X86::FsORPDrr, X86::FsORPDrm },
- { X86::FsORPSrr, X86::FsORPSrm },
- { X86::FsXORPDrr, X86::FsXORPDrm },
- { X86::FsXORPSrr, X86::FsXORPSrm },
- { X86::HADDPDrr, X86::HADDPDrm },
- { X86::HADDPSrr, X86::HADDPSrm },
- { X86::HSUBPDrr, X86::HSUBPDrm },
- { X86::HSUBPSrr, X86::HSUBPSrm },
- { X86::IMUL16rr, X86::IMUL16rm },
- { X86::IMUL32rr, X86::IMUL32rm },
- { X86::IMUL64rr, X86::IMUL64rm },
- { X86::MAXPDrr, X86::MAXPDrm },
- { X86::MAXPDrr_Int, X86::MAXPDrm_Int },
- { X86::MAXPSrr, X86::MAXPSrm },
- { X86::MAXPSrr_Int, X86::MAXPSrm_Int },
- { X86::MAXSDrr, X86::MAXSDrm },
- { X86::MAXSDrr_Int, X86::MAXSDrm_Int },
- { X86::MAXSSrr, X86::MAXSSrm },
- { X86::MAXSSrr_Int, X86::MAXSSrm_Int },
- { X86::MINPDrr, X86::MINPDrm },
- { X86::MINPDrr_Int, X86::MINPDrm_Int },
- { X86::MINPSrr, X86::MINPSrm },
- { X86::MINPSrr_Int, X86::MINPSrm_Int },
- { X86::MINSDrr, X86::MINSDrm },
- { X86::MINSDrr_Int, X86::MINSDrm_Int },
- { X86::MINSSrr, X86::MINSSrm },
- { X86::MINSSrr_Int, X86::MINSSrm_Int },
- { X86::MULPDrr, X86::MULPDrm },
- { X86::MULPSrr, X86::MULPSrm },
- { X86::MULSDrr, X86::MULSDrm },
- { X86::MULSSrr, X86::MULSSrm },
- { X86::OR16rr, X86::OR16rm },
- { X86::OR32rr, X86::OR32rm },
- { X86::OR64rr, X86::OR64rm },
- { X86::OR8rr, X86::OR8rm },
- { X86::ORPDrr, X86::ORPDrm },
- { X86::ORPSrr, X86::ORPSrm },
- { X86::PACKSSDWrr, X86::PACKSSDWrm },
- { X86::PACKSSWBrr, X86::PACKSSWBrm },
- { X86::PACKUSWBrr, X86::PACKUSWBrm },
- { X86::PADDBrr, X86::PADDBrm },
- { X86::PADDDrr, X86::PADDDrm },
- { X86::PADDQrr, X86::PADDQrm },
- { X86::PADDSBrr, X86::PADDSBrm },
- { X86::PADDSWrr, X86::PADDSWrm },
- { X86::PADDWrr, X86::PADDWrm },
- { X86::PANDNrr, X86::PANDNrm },
- { X86::PANDrr, X86::PANDrm },
- { X86::PAVGBrr, X86::PAVGBrm },
- { X86::PAVGWrr, X86::PAVGWrm },
- { X86::PCMPEQBrr, X86::PCMPEQBrm },
- { X86::PCMPEQDrr, X86::PCMPEQDrm },
- { X86::PCMPEQWrr, X86::PCMPEQWrm },
- { X86::PCMPGTBrr, X86::PCMPGTBrm },
- { X86::PCMPGTDrr, X86::PCMPGTDrm },
- { X86::PCMPGTWrr, X86::PCMPGTWrm },
- { X86::PINSRWrri, X86::PINSRWrmi },
- { X86::PMADDWDrr, X86::PMADDWDrm },
- { X86::PMAXSWrr, X86::PMAXSWrm },
- { X86::PMAXUBrr, X86::PMAXUBrm },
- { X86::PMINSWrr, X86::PMINSWrm },
- { X86::PMINUBrr, X86::PMINUBrm },
- { X86::PMULDQrr, X86::PMULDQrm },
- { X86::PMULHUWrr, X86::PMULHUWrm },
- { X86::PMULHWrr, X86::PMULHWrm },
- { X86::PMULLDrr, X86::PMULLDrm },
- { X86::PMULLDrr_int, X86::PMULLDrm_int },
- { X86::PMULLWrr, X86::PMULLWrm },
- { X86::PMULUDQrr, X86::PMULUDQrm },
- { X86::PORrr, X86::PORrm },
- { X86::PSADBWrr, X86::PSADBWrm },
- { X86::PSLLDrr, X86::PSLLDrm },
- { X86::PSLLQrr, X86::PSLLQrm },
- { X86::PSLLWrr, X86::PSLLWrm },
- { X86::PSRADrr, X86::PSRADrm },
- { X86::PSRAWrr, X86::PSRAWrm },
- { X86::PSRLDrr, X86::PSRLDrm },
- { X86::PSRLQrr, X86::PSRLQrm },
- { X86::PSRLWrr, X86::PSRLWrm },
- { X86::PSUBBrr, X86::PSUBBrm },
- { X86::PSUBDrr, X86::PSUBDrm },
- { X86::PSUBSBrr, X86::PSUBSBrm },
- { X86::PSUBSWrr, X86::PSUBSWrm },
- { X86::PSUBWrr, X86::PSUBWrm },
- { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm },
- { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm },
- { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm },
- { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm },
- { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm },
- { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm },
- { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm },
- { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm },
- { X86::PXORrr, X86::PXORrm },
- { X86::SBB32rr, X86::SBB32rm },
- { X86::SBB64rr, X86::SBB64rm },
- { X86::SHUFPDrri, X86::SHUFPDrmi },
- { X86::SHUFPSrri, X86::SHUFPSrmi },
- { X86::SUB16rr, X86::SUB16rm },
- { X86::SUB32rr, X86::SUB32rm },
- { X86::SUB64rr, X86::SUB64rm },
- { X86::SUB8rr, X86::SUB8rm },
- { X86::SUBPDrr, X86::SUBPDrm },
- { X86::SUBPSrr, X86::SUBPSrm },
- { X86::SUBSDrr, X86::SUBSDrm },
- { X86::SUBSSrr, X86::SUBSSrm },
+ static const unsigned OpTbl2[][3] = {
+ { X86::ADC32rr, X86::ADC32rm, 0 },
+ { X86::ADC64rr, X86::ADC64rm, 0 },
+ { X86::ADD16rr, X86::ADD16rm, 0 },
+ { X86::ADD32rr, X86::ADD32rm, 0 },
+ { X86::ADD64rr, X86::ADD64rm, 0 },
+ { X86::ADD8rr, X86::ADD8rm, 0 },
+ { X86::ADDPDrr, X86::ADDPDrm, 16 },
+ { X86::ADDPSrr, X86::ADDPSrm, 16 },
+ { X86::ADDSDrr, X86::ADDSDrm, 0 },
+ { X86::ADDSSrr, X86::ADDSSrm, 0 },
+ { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 },
+ { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 },
+ { X86::AND16rr, X86::AND16rm, 0 },
+ { X86::AND32rr, X86::AND32rm, 0 },
+ { X86::AND64rr, X86::AND64rm, 0 },
+ { X86::AND8rr, X86::AND8rm, 0 },
+ { X86::ANDNPDrr, X86::ANDNPDrm, 16 },
+ { X86::ANDNPSrr, X86::ANDNPSrm, 16 },
+ { X86::ANDPDrr, X86::ANDPDrm, 16 },
+ { X86::ANDPSrr, X86::ANDPSrm, 16 },
+ { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
+ { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
+ { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
+ { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
+ { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
+ { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
+ { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
+ { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
+ { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
+ { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
+ { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
+ { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
+ { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
+ { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
+ { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
+ { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
+ { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
+ { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
+ { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
+ { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
+ { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
+ { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
+ { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
+ { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
+ { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
+ { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
+ { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
+ { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
+ { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
+ { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
+ { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
+ { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
+ { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
+ { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
+ { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
+ { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
+ { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
+ { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
+ { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
+ { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
+ { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
+ { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
+ { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
+ { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
+ { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
+ { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
+ { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
+ { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
+ { X86::CMPPDrri, X86::CMPPDrmi, 16 },
+ { X86::CMPPSrri, X86::CMPPSrmi, 16 },
+ { X86::CMPSDrr, X86::CMPSDrm, 0 },
+ { X86::CMPSSrr, X86::CMPSSrm, 0 },
+ { X86::DIVPDrr, X86::DIVPDrm, 16 },
+ { X86::DIVPSrr, X86::DIVPSrm, 16 },
+ { X86::DIVSDrr, X86::DIVSDrm, 0 },
+ { X86::DIVSSrr, X86::DIVSSrm, 0 },
+ { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 },
+ { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 },
+ { X86::FsANDPDrr, X86::FsANDPDrm, 16 },
+ { X86::FsANDPSrr, X86::FsANDPSrm, 16 },
+ { X86::FsORPDrr, X86::FsORPDrm, 16 },
+ { X86::FsORPSrr, X86::FsORPSrm, 16 },
+ { X86::FsXORPDrr, X86::FsXORPDrm, 16 },
+ { X86::FsXORPSrr, X86::FsXORPSrm, 16 },
+ { X86::HADDPDrr, X86::HADDPDrm, 16 },
+ { X86::HADDPSrr, X86::HADDPSrm, 16 },
+ { X86::HSUBPDrr, X86::HSUBPDrm, 16 },
+ { X86::HSUBPSrr, X86::HSUBPSrm, 16 },
+ { X86::IMUL16rr, X86::IMUL16rm, 0 },
+ { X86::IMUL32rr, X86::IMUL32rm, 0 },
+ { X86::IMUL64rr, X86::IMUL64rm, 0 },
+ { X86::MAXPDrr, X86::MAXPDrm, 16 },
+ { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 },
+ { X86::MAXPSrr, X86::MAXPSrm, 16 },
+ { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 },
+ { X86::MAXSDrr, X86::MAXSDrm, 0 },
+ { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
+ { X86::MAXSSrr, X86::MAXSSrm, 0 },
+ { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
+ { X86::MINPDrr, X86::MINPDrm, 16 },
+ { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 },
+ { X86::MINPSrr, X86::MINPSrm, 16 },
+ { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 },
+ { X86::MINSDrr, X86::MINSDrm, 0 },
+ { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
+ { X86::MINSSrr, X86::MINSSrm, 0 },
+ { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
+ { X86::MULPDrr, X86::MULPDrm, 16 },
+ { X86::MULPSrr, X86::MULPSrm, 16 },
+ { X86::MULSDrr, X86::MULSDrm, 0 },
+ { X86::MULSSrr, X86::MULSSrm, 0 },
+ { X86::OR16rr, X86::OR16rm, 0 },
+ { X86::OR32rr, X86::OR32rm, 0 },
+ { X86::OR64rr, X86::OR64rm, 0 },
+ { X86::OR8rr, X86::OR8rm, 0 },
+ { X86::ORPDrr, X86::ORPDrm, 16 },
+ { X86::ORPSrr, X86::ORPSrm, 16 },
+ { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 },
+ { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 },
+ { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 },
+ { X86::PADDBrr, X86::PADDBrm, 16 },
+ { X86::PADDDrr, X86::PADDDrm, 16 },
+ { X86::PADDQrr, X86::PADDQrm, 16 },
+ { X86::PADDSBrr, X86::PADDSBrm, 16 },
+ { X86::PADDSWrr, X86::PADDSWrm, 16 },
+ { X86::PADDWrr, X86::PADDWrm, 16 },
+ { X86::PANDNrr, X86::PANDNrm, 16 },
+ { X86::PANDrr, X86::PANDrm, 16 },
+ { X86::PAVGBrr, X86::PAVGBrm, 16 },
+ { X86::PAVGWrr, X86::PAVGWrm, 16 },
+ { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 },
+ { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 },
+ { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 },
+ { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 },
+ { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 },
+ { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 },
+ { X86::PINSRWrri, X86::PINSRWrmi, 16 },
+ { X86::PMADDWDrr, X86::PMADDWDrm, 16 },
+ { X86::PMAXSWrr, X86::PMAXSWrm, 16 },
+ { X86::PMAXUBrr, X86::PMAXUBrm, 16 },
+ { X86::PMINSWrr, X86::PMINSWrm, 16 },
+ { X86::PMINUBrr, X86::PMINUBrm, 16 },
+ { X86::PMULDQrr, X86::PMULDQrm, 16 },
+ { X86::PMULHUWrr, X86::PMULHUWrm, 16 },
+ { X86::PMULHWrr, X86::PMULHWrm, 16 },
+ { X86::PMULLDrr, X86::PMULLDrm, 16 },
+ { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 },
+ { X86::PMULLWrr, X86::PMULLWrm, 16 },
+ { X86::PMULUDQrr, X86::PMULUDQrm, 16 },
+ { X86::PORrr, X86::PORrm, 16 },
+ { X86::PSADBWrr, X86::PSADBWrm, 16 },
+ { X86::PSLLDrr, X86::PSLLDrm, 16 },
+ { X86::PSLLQrr, X86::PSLLQrm, 16 },
+ { X86::PSLLWrr, X86::PSLLWrm, 16 },
+ { X86::PSRADrr, X86::PSRADrm, 16 },
+ { X86::PSRAWrr, X86::PSRAWrm, 16 },
+ { X86::PSRLDrr, X86::PSRLDrm, 16 },
+ { X86::PSRLQrr, X86::PSRLQrm, 16 },
+ { X86::PSRLWrr, X86::PSRLWrm, 16 },
+ { X86::PSUBBrr, X86::PSUBBrm, 16 },
+ { X86::PSUBDrr, X86::PSUBDrm, 16 },
+ { X86::PSUBSBrr, X86::PSUBSBrm, 16 },
+ { X86::PSUBSWrr, X86::PSUBSWrm, 16 },
+ { X86::PSUBWrr, X86::PSUBWrm, 16 },
+ { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 },
+ { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 },
+ { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 },
+ { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 },
+ { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 },
+ { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 },
+ { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 },
+ { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 },
+ { X86::PXORrr, X86::PXORrm, 16 },
+ { X86::SBB32rr, X86::SBB32rm, 0 },
+ { X86::SBB64rr, X86::SBB64rm, 0 },
+ { X86::SHUFPDrri, X86::SHUFPDrmi, 16 },
+ { X86::SHUFPSrri, X86::SHUFPSrmi, 16 },
+ { X86::SUB16rr, X86::SUB16rm, 0 },
+ { X86::SUB32rr, X86::SUB32rm, 0 },
+ { X86::SUB64rr, X86::SUB64rm, 0 },
+ { X86::SUB8rr, X86::SUB8rm, 0 },
+ { X86::SUBPDrr, X86::SUBPDrm, 16 },
+ { X86::SUBPSrr, X86::SUBPSrm, 16 },
+ { X86::SUBSDrr, X86::SUBSDrm, 0 },
+ { X86::SUBSSrr, X86::SUBSSrm, 0 },
// FIXME: TEST*rr -> swapped operand of TEST*mr.
- { X86::UNPCKHPDrr, X86::UNPCKHPDrm },
- { X86::UNPCKHPSrr, X86::UNPCKHPSrm },
- { X86::UNPCKLPDrr, X86::UNPCKLPDrm },
- { X86::UNPCKLPSrr, X86::UNPCKLPSrm },
- { X86::XOR16rr, X86::XOR16rm },
- { X86::XOR32rr, X86::XOR32rm },
- { X86::XOR64rr, X86::XOR64rm },
- { X86::XOR8rr, X86::XOR8rm },
- { X86::XORPDrr, X86::XORPDrm },
- { X86::XORPSrr, X86::XORPSrm }
+ { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 },
+ { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 },
+ { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 },
+ { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 },
+ { X86::XOR16rr, X86::XOR16rm, 0 },
+ { X86::XOR32rr, X86::XOR32rm, 0 },
+ { X86::XOR64rr, X86::XOR64rm, 0 },
+ { X86::XOR8rr, X86::XOR8rm, 0 },
+ { X86::XORPDrr, X86::XORPDrm, 16 },
+ { X86::XORPSrr, X86::XORPSrm, 16 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
unsigned RegOp = OpTbl2[i][0];
unsigned MemOp = OpTbl2[i][1];
+ unsigned Align = OpTbl2[i][2];
if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp,
- MemOp)).second)
+ std::make_pair(MemOp,Align))).second)
assert(false && "Duplicated entries?");
- unsigned AuxInfo = 2 | (1 << 4); // Index 2, folded load
+ // Index 2, folded load
+ unsigned AuxInfo = 2 | (1 << 4);
if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
std::make_pair(RegOp, AuxInfo))).second)
AmbEntries.push_back(MemOp);
@@ -760,7 +766,6 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
-
/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
/// X86::MOVPC32r.
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
@@ -776,37 +781,9 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
return isPICBase;
}
-/// isGVStub - Return true if the GV requires an extra load to get the
-/// real address.
-static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) {
- return TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false);
-}
-
-/// CanRematLoadWithDispOperand - Return true if a load with the specified
-/// operand is a candidate for remat: for this to be true we need to know that
-/// the load will always return the same value, even if moved.
-static bool CanRematLoadWithDispOperand(const MachineOperand &MO,
- X86TargetMachine &TM) {
- // Loads from constant pool entries can be remat'd.
- if (MO.isCPI()) return true;
-
- // We can remat globals in some cases.
- if (MO.isGlobal()) {
- // If this is a load of a stub, not of the global, we can remat it. This
- // access will always return the address of the global.
- if (isGVStub(MO.getGlobal(), TM))
- return true;
-
- // If the global itself is constant, we can remat the load.
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal()))
- if (GV->isConstant())
- return true;
- }
- return false;
-}
-
bool
-X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
+X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
switch (MI->getOpcode()) {
default: break;
case X86::MOV8rm:
@@ -825,7 +802,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
if (MI->getOperand(1).isReg() &&
MI->getOperand(2).isImm() &&
MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
- CanRematLoadWithDispOperand(MI->getOperand(4), TM)) {
+ MI->isInvariantLoad(AA)) {
unsigned BaseReg = MI->getOperand(1).getReg();
if (BaseReg == 0 || BaseReg == X86::RIP)
return true;
@@ -876,7 +853,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
/// would clobber the EFLAGS condition register. Note the result may be
/// conservative. If it cannot definitely determine the safety after visiting
-/// two instructions it assumes it's not safe.
+/// a few instructions in each direction it assumes it's not safe.
static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
// It's always safe to clobber EFLAGS at the end of a block.
@@ -884,11 +861,13 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
return true;
// For compile time consideration, if we are not able to determine the
- // safety after visiting 2 instructions, we will assume it's not safe.
- for (unsigned i = 0; i < 2; ++i) {
+ // safety after visiting 4 instructions in each direction, we will assume
+ // it's not safe.
+ MachineBasicBlock::iterator Iter = I;
+ for (unsigned i = 0; i < 4; ++i) {
bool SeenDef = false;
- for (unsigned j = 0, e = I->getNumOperands(); j != e; ++j) {
- MachineOperand &MO = I->getOperand(j);
+ for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = Iter->getOperand(j);
if (!MO.isReg())
continue;
if (MO.getReg() == X86::EFLAGS) {
@@ -901,10 +880,33 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
if (SeenDef)
// This instruction defines EFLAGS, no need to look any further.
return true;
- ++I;
+ ++Iter;
// If we make it to the end of the block, it's safe to clobber EFLAGS.
- if (I == MBB.end())
+ if (Iter == MBB.end())
+ return true;
+ }
+
+ Iter = I;
+ for (unsigned i = 0; i < 4; ++i) {
+ // If we make it to the beginning of the block, it's safe to clobber
+ // EFLAGS iff EFLAGS is not live-in.
+ if (Iter == MBB.begin())
+ return !MBB.isLiveIn(X86::EFLAGS);
+
+ --Iter;
+ bool SawKill = false;
+ for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = Iter->getOperand(j);
+ if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
+ if (MO.isDef()) return MO.isDead();
+ if (MO.isKill()) SawKill = true;
+ }
+ }
+
+ if (SawKill)
+ // This instruction kills EFLAGS and doesn't redefine it, so
+ // there's no need to look further.
return true;
}
@@ -914,14 +916,11 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned DestReg,
+ unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig) const {
DebugLoc DL = DebugLoc::getUnknownLoc();
if (I != MBB.end()) DL = I->getDebugLoc();
- unsigned SubIdx = Orig->getOperand(0).isReg()
- ? Orig->getOperand(0).getSubReg() : 0;
- bool ChangeSubIdx = SubIdx != 0;
if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
DestReg = RI.getSubReg(DestReg, SubIdx);
SubIdx = 0;
@@ -929,76 +928,36 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
// MOV32r0 etc. are implemented with xor which clobbers condition code.
// Re-materialize them as movri instructions to avoid side effects.
- bool Emitted = false;
- switch (Orig->getOpcode()) {
+ bool Clone = true;
+ unsigned Opc = Orig->getOpcode();
+ switch (Opc) {
default: break;
case X86::MOV8r0:
case X86::MOV16r0:
- case X86::MOV32r0:
- case X86::MOV64r0: {
+ case X86::MOV32r0: {
if (!isSafeToClobberEFLAGS(MBB, I)) {
- unsigned Opc = 0;
- switch (Orig->getOpcode()) {
+ switch (Opc) {
default: break;
case X86::MOV8r0: Opc = X86::MOV8ri; break;
case X86::MOV16r0: Opc = X86::MOV16ri; break;
case X86::MOV32r0: Opc = X86::MOV32ri; break;
- case X86::MOV64r0: Opc = X86::MOV64ri32; break;
}
- BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0);
- Emitted = true;
+ Clone = false;
}
break;
}
}
- if (!Emitted) {
+ if (Clone) {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
MI->getOperand(0).setReg(DestReg);
MBB.insert(I, MI);
+ } else {
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0);
}
- if (ChangeSubIdx) {
- MachineInstr *NewMI = prior(I);
- NewMI->getOperand(0).setSubReg(SubIdx);
- }
-}
-
-/// isInvariantLoad - Return true if the specified instruction (which is marked
-/// mayLoad) is loading from a location whose value is invariant across the
-/// function. For example, loading a value from the constant pool or from
-/// from the argument area of a function if it does not change. This should
-/// only return true of *all* loads the instruction does are invariant (if it
-/// does multiple loads).
-bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const {
- // This code cares about loads from three cases: constant pool entries,
- // invariant argument slots, and global stubs. In order to handle these cases
- // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV
- // operand and base our analysis on it. This is safe because the address of
- // none of these three cases is ever used as anything other than a load base
- // and X86 doesn't have any instructions that load from multiple places.
-
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- // Loads from constant pools are trivially invariant.
- if (MO.isCPI())
- return true;
-
- if (MO.isGlobal())
- return isGVStub(MO.getGlobal(), TM);
-
- // If this is a load from an invariant stack slot, the load is a constant.
- if (MO.isFI()) {
- const MachineFrameInfo &MFI =
- *MI->getParent()->getParent()->getFrameInfo();
- int Idx = MO.getIndex();
- return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx);
- }
- }
-
- // All other instances of these instructions are presumed to have other
- // issues.
- return false;
+ MachineInstr *NewMI = prior(I);
+ NewMI->getOperand(0).setSubReg(SubIdx);
}
/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
@@ -1304,7 +1263,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
unsigned Opc;
unsigned Size;
switch (MI->getOpcode()) {
- default: assert(0 && "Unreachable!");
+ default: llvm_unreachable("Unreachable!");
case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
@@ -1459,7 +1418,7 @@ static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
switch (CC) {
- default: assert(0 && "Illegal condition code!");
+ default: llvm_unreachable("Illegal condition code!");
case X86::COND_E: return X86::JE;
case X86::COND_NE: return X86::JNE;
case X86::COND_L: return X86::JL;
@@ -1483,7 +1442,7 @@ unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
/// e.g. turning COND_E to COND_NE.
X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
switch (CC) {
- default: assert(0 && "Illegal condition code!");
+ default: llvm_unreachable("Illegal condition code!");
case X86::COND_E: return X86::COND_NE;
case X86::COND_NE: return X86::COND_E;
case X86::COND_L: return X86::COND_GE;
@@ -1699,14 +1658,26 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
/* Source and destination have the same register class. */;
else if (CommonRC->hasSuperClass(SrcRC))
CommonRC = SrcRC;
- else if (!DestRC->hasSubClass(SrcRC))
- CommonRC = 0;
+ else if (!DestRC->hasSubClass(SrcRC)) {
+ // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other,
+ // but we want to copy then as GR64. Similarly, for GR32_NOREX and
+ // GR32_NOSP, copy as GR32.
+ if (SrcRC->hasSuperClass(&X86::GR64RegClass) &&
+ DestRC->hasSuperClass(&X86::GR64RegClass))
+ CommonRC = &X86::GR64RegClass;
+ else if (SrcRC->hasSuperClass(&X86::GR32RegClass) &&
+ DestRC->hasSuperClass(&X86::GR32RegClass))
+ CommonRC = &X86::GR32RegClass;
+ else
+ CommonRC = 0;
+ }
if (CommonRC) {
unsigned Opc;
- if (CommonRC == &X86::GR64RegClass) {
+ if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) {
Opc = X86::MOV64rr;
- } else if (CommonRC == &X86::GR32RegClass) {
+ } else if (CommonRC == &X86::GR32RegClass ||
+ CommonRC == &X86::GR32_NOSPRegClass) {
Opc = X86::MOV32rr;
} else if (CommonRC == &X86::GR16RegClass) {
Opc = X86::MOV16rr;
@@ -1731,7 +1702,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
Opc = X86::MOV8rr_NOREX;
else
Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::GR64_NOREXRegClass) {
+ } else if (CommonRC == &X86::GR64_NOREXRegClass ||
+ CommonRC == &X86::GR64_NOREX_NOSPRegClass) {
Opc = X86::MOV64rr;
} else if (CommonRC == &X86::GR32_NOREXRegClass) {
Opc = X86::MOV32rr;
@@ -1759,16 +1731,17 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg);
return true;
}
-
+
// Moving EFLAGS to / from another register requires a push and a pop.
if (SrcRC == &X86::CCRRegClass) {
if (SrcReg != X86::EFLAGS)
return false;
- if (DestRC == &X86::GR64RegClass) {
+ if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
BuildMI(MBB, MI, DL, get(X86::PUSHFQ));
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
return true;
- } else if (DestRC == &X86::GR32RegClass) {
+ } else if (DestRC == &X86::GR32RegClass ||
+ DestRC == &X86::GR32_NOSPRegClass) {
BuildMI(MBB, MI, DL, get(X86::PUSHFD));
BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
return true;
@@ -1776,11 +1749,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
} else if (DestRC == &X86::CCRRegClass) {
if (DestReg != X86::EFLAGS)
return false;
- if (SrcRC == &X86::GR64RegClass) {
+ if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg);
BuildMI(MBB, MI, DL, get(X86::POPFQ));
return true;
- } else if (SrcRC == &X86::GR32RegClass) {
+ } else if (SrcRC == &X86::GR32RegClass ||
+ DestRC == &X86::GR32_NOSPRegClass) {
BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg);
BuildMI(MBB, MI, DL, get(X86::POPFD));
return true;
@@ -1838,9 +1812,9 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
bool isStackAligned,
TargetMachine &TM) {
unsigned Opc = 0;
- if (RC == &X86::GR64RegClass) {
+ if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) {
Opc = X86::MOV64mr;
- } else if (RC == &X86::GR32RegClass) {
+ } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) {
Opc = X86::MOV32mr;
} else if (RC == &X86::GR16RegClass) {
Opc = X86::MOV16mr;
@@ -1865,7 +1839,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
Opc = X86::MOV8mr_NOREX;
else
Opc = X86::MOV8mr;
- } else if (RC == &X86::GR64_NOREXRegClass) {
+ } else if (RC == &X86::GR64_NOREXRegClass ||
+ RC == &X86::GR64_NOREX_NOSPRegClass) {
Opc = X86::MOV64mr;
} else if (RC == &X86::GR32_NOREXRegClass) {
Opc = X86::MOV32mr;
@@ -1889,8 +1864,7 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
} else if (RC == &X86::VR64RegClass) {
Opc = X86::MMX_MOVQ64mr;
} else {
- assert(0 && "Unknown regclass");
- abort();
+ llvm_unreachable("Unknown regclass");
}
return Opc;
@@ -1914,6 +1888,8 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
bool isKill,
SmallVectorImpl<MachineOperand> &Addr,
const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
bool isAligned = (RI.getStackAlignment() >= 16) ||
RI.needsStackRealignment(MF);
@@ -1923,6 +1899,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
MIB.addOperand(Addr[i]);
MIB.addReg(SrcReg, getKillRegState(isKill));
+ (*MIB).setMemRefs(MMOBegin, MMOEnd);
NewMIs.push_back(MIB);
}
@@ -1931,9 +1908,9 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
bool isStackAligned,
const TargetMachine &TM) {
unsigned Opc = 0;
- if (RC == &X86::GR64RegClass) {
+ if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) {
Opc = X86::MOV64rm;
- } else if (RC == &X86::GR32RegClass) {
+ } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) {
Opc = X86::MOV32rm;
} else if (RC == &X86::GR16RegClass) {
Opc = X86::MOV16rm;
@@ -1958,7 +1935,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
Opc = X86::MOV8rm_NOREX;
else
Opc = X86::MOV8rm;
- } else if (RC == &X86::GR64_NOREXRegClass) {
+ } else if (RC == &X86::GR64_NOREXRegClass ||
+ RC == &X86::GR64_NOREX_NOSPRegClass) {
Opc = X86::MOV64rm;
} else if (RC == &X86::GR32_NOREXRegClass) {
Opc = X86::MOV32rm;
@@ -1982,8 +1960,7 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
} else if (RC == &X86::VR64RegClass) {
Opc = X86::MMX_MOVQ64rm;
} else {
- assert(0 && "Unknown regclass");
- abort();
+ llvm_unreachable("Unknown regclass");
}
return Opc;
@@ -2005,6 +1982,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
SmallVectorImpl<MachineOperand> &Addr,
const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
bool isAligned = (RI.getStackAlignment() >= 16) ||
RI.needsStackRealignment(MF);
@@ -2013,6 +1992,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
MIB.addOperand(Addr[i]);
+ (*MIB).setMemRefs(MMOBegin, MMOEnd);
NewMIs.push_back(MIB);
}
@@ -2026,9 +2006,11 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
if (MI != MBB.end()) DL = MI->getDebugLoc();
bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+ bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
unsigned SlotSize = is64Bit ? 8 : 4;
MachineFunction &MF = *MBB.getParent();
+ unsigned FPReg = RI.getFrameRegister(MF);
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
unsigned CalleeFrameSize = 0;
@@ -2038,10 +2020,12 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
const TargetRegisterClass *RegClass = CSI[i-1].getRegClass();
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
- if (RegClass != &X86::VR128RegClass) {
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitPrologue will handle spilling of frame register.
+ continue;
+ if (RegClass != &X86::VR128RegClass && !isWin64) {
CalleeFrameSize += SlotSize;
- BuildMI(MBB, MI, DL, get(Opc))
- .addReg(Reg, RegState::Kill);
+ BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
} else {
storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass);
}
@@ -2060,13 +2044,18 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL = DebugLoc::getUnknownLoc();
if (MI != MBB.end()) DL = MI->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ unsigned FPReg = RI.getFrameRegister(MF);
bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
-
+ bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
+ continue;
const TargetRegisterClass *RegClass = CSI[i].getRegClass();
- if (RegClass != &X86::VR128RegClass) {
+ if (RegClass != &X86::VR128RegClass && !isWin64) {
BuildMI(MBB, MI, DL, get(Opc), Reg);
} else {
loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
@@ -2143,8 +2132,9 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
MachineInstr*
X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI, unsigned i,
- const SmallVectorImpl<MachineOperand> &MOs) const{
- const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
+ const SmallVectorImpl<MachineOperand> &MOs,
+ unsigned Size, unsigned Align) const {
+ const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
bool isTwoAddrFold = false;
unsigned NumOps = MI->getDesc().getNumOperands();
bool isTwoAddr = NumOps > 1 &&
@@ -2165,8 +2155,6 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
else if (MI->getOpcode() == X86::MOV32r0)
NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
- else if (MI->getOpcode() == X86::MOV64r0)
- NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
else if (MI->getOpcode() == X86::MOV8r0)
NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
if (NewMI)
@@ -2182,60 +2170,82 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// If table selected...
if (OpcodeTablePtr) {
// Find the Opcode to fuse
- DenseMap<unsigned*, unsigned>::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
OpcodeTablePtr->find((unsigned*)MI->getOpcode());
if (I != OpcodeTablePtr->end()) {
+ unsigned Opcode = I->second.first;
+ unsigned MinAlign = I->second.second;
+ if (Align < MinAlign)
+ return NULL;
+ bool NarrowToMOV32rm = false;
+ if (Size) {
+ unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize();
+ if (Size < RCSize) {
+ // Check if it's safe to fold the load. If the size of the object is
+ // narrower than the load width, then it's not.
+ if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
+ return NULL;
+ // If this is a 64-bit load, but the spill slot is 32, then we can do
+ // a 32-bit load which is implicitly zero-extended. This likely is due
+ // to liveintervalanalysis remat'ing a load from stack slot.
+ if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
+ return NULL;
+ Opcode = X86::MOV32rm;
+ NarrowToMOV32rm = true;
+ }
+ }
+
if (isTwoAddrFold)
- NewMI = FuseTwoAddrInst(MF, I->second, MOs, MI, *this);
+ NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
else
- NewMI = FuseInst(MF, I->second, i, MOs, MI, *this);
+ NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this);
+
+ if (NarrowToMOV32rm) {
+ // If this is the special case where we use a MOV32rm to load a 32-bit
+ // value and zero-extend the top bits. Change the destination register
+ // to a 32-bit one.
+ unsigned DstReg = NewMI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
+ 4/*x86_subreg_32bit*/));
+ else
+ NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/);
+ }
return NewMI;
}
}
// No fusion
if (PrintFailedFusing)
- cerr << "We failed to fuse operand " << i << " in " << *MI;
+ errs() << "We failed to fuse operand " << i << " in " << *MI;
return NULL;
}
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const {
// Check switch flag
if (NoFusing) return NULL;
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Size = MFI->getObjectSize(FrameIndex);
unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
- // FIXME: Move alignment requirement into tables?
- if (Alignment < 16) {
- switch (MI->getOpcode()) {
- default: break;
- // Not always safe to fold movsd into these instructions since their load
- // folding variants expects the address to be 16 byte aligned.
- case X86::FsANDNPDrr:
- case X86::FsANDNPSrr:
- case X86::FsANDPDrr:
- case X86::FsANDPSrr:
- case X86::FsORPDrr:
- case X86::FsORPSrr:
- case X86::FsXORPDrr:
- case X86::FsXORPSrr:
- return NULL;
- }
- }
-
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
+ unsigned RCSize = 0;
switch (MI->getOpcode()) {
default: return NULL;
- case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break;
}
+ // Check if it's safe to fold the load. If the size of the object is
+ // narrower than the load width, then it's not.
+ if (Size < RCSize)
+ return NULL;
// Change to CMPXXri r, 0 first.
MI->setDesc(get(NewOpc));
MI->getOperand(1).ChangeToImmediate(0);
@@ -2244,12 +2254,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
SmallVector<MachineOperand,4> MOs;
MOs.push_back(MachineOperand::CreateFI(FrameIndex));
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs);
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
}
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const {
// Check switch flag
if (NoFusing) return NULL;
@@ -2257,26 +2267,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Determine the alignment of the load.
unsigned Alignment = 0;
if (LoadMI->hasOneMemOperand())
- Alignment = LoadMI->memoperands_begin()->getAlignment();
-
- // FIXME: Move alignment requirement into tables?
- if (Alignment < 16) {
- switch (MI->getOpcode()) {
- default: break;
- // Not always safe to fold movsd into these instructions since their load
- // folding variants expects the address to be 16 byte aligned.
- case X86::FsANDNPDrr:
- case X86::FsANDNPSrr:
- case X86::FsANDPDrr:
- case X86::FsANDPSrr:
- case X86::FsORPDrr:
- case X86::FsORPSrr:
- case X86::FsXORPDrr:
- case X86::FsXORPSrr:
- return NULL;
+ Alignment = (*LoadMI->memoperands_begin())->getAlignment();
+ else
+ switch (LoadMI->getOpcode()) {
+ case X86::V_SET0:
+ case X86::V_SETALLONES:
+ Alignment = 16;
+ break;
+ case X86::FsFLD0SD:
+ Alignment = 8;
+ break;
+ case X86::FsFLD0SS:
+ Alignment = 4;
+ break;
+ default:
+ llvm_unreachable("Don't know how to fold this instruction!");
}
- }
-
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
switch (MI->getOpcode()) {
@@ -2293,28 +2299,40 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return NULL;
SmallVector<MachineOperand,X86AddrNumOperands> MOs;
- if (LoadMI->getOpcode() == X86::V_SET0 ||
- LoadMI->getOpcode() == X86::V_SETALLONES) {
+ switch (LoadMI->getOpcode()) {
+ case X86::V_SET0:
+ case X86::V_SETALLONES:
+ case X86::FsFLD0SD:
+ case X86::FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
- if (TM.getRelocationModel() == Reloc::PIC_ &&
- !TM.getSubtarget<X86Subtarget>().is64Bit())
- // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF);
- // This doesn't work for several reasons.
- // 1. GlobalBaseReg may have been spilled.
- // 2. It may not be live at MI.
- return false;
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ PICBase = X86::RIP;
+ else
+ // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF);
+ // This doesn't work for several reasons.
+ // 1. GlobalBaseReg may have been spilled.
+ // 2. It may not be live at MI.
+ return NULL;
+ }
- // Create a v4i32 constant-pool entry.
+ // Create a constant-pool entry.
MachineConstantPool &MCP = *MF.getConstantPool();
- const VectorType *Ty = VectorType::get(Type::Int32Ty, 4);
- Constant *C = LoadMI->getOpcode() == X86::V_SET0 ?
- ConstantVector::getNullValue(Ty) :
- ConstantVector::getAllOnesValue(Ty);
- unsigned CPI = MCP.getConstantPoolIndex(C, 16);
+ const Type *Ty;
+ if (LoadMI->getOpcode() == X86::FsFLD0SS)
+ Ty = Type::getFloatTy(MF.getFunction()->getContext());
+ else if (LoadMI->getOpcode() == X86::FsFLD0SD)
+ Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+ else
+ Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
+ Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
+ Constant::getAllOnesValue(Ty) :
+ Constant::getNullValue(Ty);
+ unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
// Create operands to load from the constant pool entry.
MOs.push_back(MachineOperand::CreateReg(PICBase, false));
@@ -2322,13 +2340,17 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MOs.push_back(MachineOperand::CreateReg(0, false));
MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
MOs.push_back(MachineOperand::CreateReg(0, false));
- } else {
+ break;
+ }
+ default: {
// Folding a normal load. Just copy the load's address operands.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i)
MOs.push_back(LoadMI->getOperand(i));
+ break;
+ }
}
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs);
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
}
@@ -2360,15 +2382,14 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
// replacing the *two* registers with the memory location.
- const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
+ const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
} else if (OpNum == 0) { // If operand 0
switch (Opc) {
+ case X86::MOV8r0:
case X86::MOV16r0:
case X86::MOV32r0:
- case X86::MOV64r0:
- case X86::MOV8r0:
return true;
default: break;
}
@@ -2381,7 +2402,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
if (OpcodeTablePtr) {
// Find the Opcode to fuse
- DenseMap<unsigned*, unsigned>::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
OpcodeTablePtr->find((unsigned*)Opc);
if (I != OpcodeTablePtr->end())
return true;
@@ -2410,8 +2431,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
const TargetInstrDesc &TID = get(Opc);
const TargetOperandInfo &TOI = TID.OpInfo[Index];
- const TargetRegisterClass *RC = TOI.isLookupPtrRegClass()
- ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass);
+ const TargetRegisterClass *RC = TOI.getRegClass(&RI);
SmallVector<MachineOperand, X86AddrNumOperands> AddrOps;
SmallVector<MachineOperand,2> BeforeOps;
SmallVector<MachineOperand,2> AfterOps;
@@ -2430,7 +2450,11 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
// Emit the load instruction.
if (UnfoldLoad) {
- loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractLoadMemRefs(MI->memoperands_begin(),
+ MI->memoperands_end());
+ loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
if (UnfoldStore) {
// Address operands cannot be marked isKill.
for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) {
@@ -2489,10 +2513,12 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
// Emit the store instruction.
if (UnfoldStore) {
- const TargetOperandInfo &DstTOI = TID.OpInfo[0];
- const TargetRegisterClass *DstRC = DstTOI.isLookupPtrRegClass()
- ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass);
- storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs);
+ const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractStoreMemRefs(MI->memoperands_begin(),
+ MI->memoperands_end());
+ storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
}
return true;
@@ -2513,9 +2539,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
bool FoldedLoad = I->second.second & (1 << 4);
bool FoldedStore = I->second.second & (1 << 5);
const TargetInstrDesc &TID = get(Opc);
- const TargetOperandInfo &TOI = TID.OpInfo[Index];
- const TargetRegisterClass *RC = TOI.isLookupPtrRegClass()
- ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass);
+ const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI);
unsigned NumDefs = TID.NumDefs;
std::vector<SDValue> AddrOps;
std::vector<SDValue> BeforeOps;
@@ -2536,35 +2560,40 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
// Emit the load instruction.
SDNode *Load = 0;
- const MachineFunction &MF = DAG.getMachineFunction();
+ MachineFunction &MF = DAG.getMachineFunction();
if (FoldedLoad) {
- MVT VT = *RC->vt_begin();
+ EVT VT = *RC->vt_begin();
bool isAligned = (RI.getStackAlignment() >= 16) ||
RI.needsStackRealignment(MF);
- Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
- VT, MVT::Other, &AddrOps[0], AddrOps.size());
+ Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
+ VT, MVT::Other, &AddrOps[0], AddrOps.size());
NewNodes.push_back(Load);
+
+ // Preserve memory reference information.
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
}
// Emit the data processing instruction.
- std::vector<MVT> VTs;
+ std::vector<EVT> VTs;
const TargetRegisterClass *DstRC = 0;
if (TID.getNumDefs() > 0) {
- const TargetOperandInfo &DstTOI = TID.OpInfo[0];
- DstRC = DstTOI.isLookupPtrRegClass()
- ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass);
+ DstRC = TID.OpInfo[0].getRegClass(&RI);
VTs.push_back(*DstRC->vt_begin());
}
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
- MVT VT = N->getValueType(i);
+ EVT VT = N->getValueType(i);
if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs())
VTs.push_back(VT);
}
if (Load)
BeforeOps.push_back(SDValue(Load, 0));
std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
- SDNode *NewNode= DAG.getTargetNode(Opc, dl, VTs, &BeforeOps[0],
- BeforeOps.size());
+ SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
+ BeforeOps.size());
NewNodes.push_back(NewNode);
// Emit the store instruction.
@@ -2574,11 +2603,18 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
AddrOps.push_back(Chain);
bool isAligned = (RI.getStackAlignment() >= 16) ||
RI.needsStackRealignment(MF);
- SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC,
- isAligned, TM),
- dl, MVT::Other,
- &AddrOps[0], AddrOps.size());
+ SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
+ isAligned, TM),
+ dl, MVT::Other,
+ &AddrOps[0], AddrOps.size());
NewNodes.push_back(Store);
+
+ // Preserve memory reference information.
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
}
return true;
@@ -2644,7 +2680,7 @@ unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) {
case X86II::Imm16: return 2;
case X86II::Imm32: return 4;
case X86II::Imm64: return 8;
- default: assert(0 && "Immediate size not set!");
+ default: llvm_unreachable("Immediate size not set!");
return 0;
}
}
@@ -2829,7 +2865,7 @@ static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) {
} else if (RelocOp->isJTI()) {
FinalSize += sizeJumpTableAddress(false);
} else {
- assert(0 && "Unknown value to relocate!");
+ llvm_unreachable("Unknown value to relocate!");
}
return FinalSize;
}
@@ -2926,7 +2962,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
case X86II::GS:
++FinalSize;
break;
- default: assert(0 && "Invalid segment!");
+ default: llvm_unreachable("Invalid segment!");
case 0: break; // No segment override!
}
@@ -2946,6 +2982,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
case X86II::TA: // 0F 3A
Need0FPrefix = true;
break;
+ case X86II::TF: // F2 0F 38
+ ++FinalSize;
+ Need0FPrefix = true;
+ break;
case X86II::REP: break; // already handled.
case X86II::XS: // F3 0F
++FinalSize;
@@ -2959,7 +2999,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
++FinalSize;
break; // Two-byte opcode prefix
- default: assert(0 && "Invalid prefix!");
+ default: llvm_unreachable("Invalid prefix!");
case 0: break; // No prefix!
}
@@ -2981,6 +3021,9 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
case X86II::TA: // 0F 3A
++FinalSize;
break;
+ case X86II::TF: // F2 0F 38
+ ++FinalSize;
+ break;
}
// If this is a two-address instruction, skip one of the register operands.
@@ -2993,7 +3036,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
--NumOps;
switch (Desc->TSFlags & X86II::FormMask) {
- default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!");
+ default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
case X86II::Pseudo:
// Remember the current PC offset, this is the PIC relocation
// base address.
@@ -3002,16 +3045,16 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
break;
case TargetInstrInfo::INLINEASM: {
const MachineFunction *MF = MI.getParent()->getParent();
- const char *AsmStr = MI.getOperand(0).getSymbolName();
- const TargetAsmInfo* AI = MF->getTarget().getTargetAsmInfo();
- FinalSize += AI->getInlineAsmLength(AsmStr);
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
+ *MF->getTarget().getMCAsmInfo());
break;
}
case TargetInstrInfo::DBG_LABEL:
case TargetInstrInfo::EH_LABEL:
break;
case TargetInstrInfo::IMPLICIT_DEF:
- case TargetInstrInfo::DECLARE:
+ case TargetInstrInfo::KILL:
case X86::DWARF_LOC:
case X86::FP_REG_KILL:
break;
@@ -3038,7 +3081,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
} else if (MO.isImm()) {
FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
} else {
- assert(0 && "Unknown RawFrm operand!");
+ llvm_unreachable("Unknown RawFrm operand!");
}
}
break;
@@ -3196,10 +3239,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
}
if (!Desc->isVariadic() && CurOp != NumOps) {
- cerr << "Cannot determine size: ";
- MI.dump();
- cerr << '\n';
- abort();
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Cannot determine size: " << MI;
+ llvm_report_error(Msg.str());
}
@@ -3209,7 +3252,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
const TargetInstrDesc &Desc = MI->getDesc();
- bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_);
+ bool IsPIC = TM.getRelocationModel() == Reloc::PIC_;
bool Is64BitMode = TM.getSubtargetImpl()->is64Bit();
unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode);
if (Desc.getOpcode() == X86::MOVPC32r)
@@ -3245,12 +3288,11 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
// If we're using vanilla 'GOT' PIC style, we should use relative addressing
// not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
- if (TM.getRelocationModel() == Reloc::PIC_ &&
- TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+ if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) {
GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
// Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
- .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 0,
+ .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
X86II::MO_GOT_ABSOLUTE_ADDRESS);
} else {
GlobalBaseReg = PC;
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 83f0194..2237c8b 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -69,35 +69,36 @@ namespace X86 {
/// instruction info tracks.
///
namespace X86II {
- enum {
+ /// Target Operand Flag enum.
+ enum TOF {
//===------------------------------------------------------------------===//
// X86 Specific MachineOperand flags.
- MO_NO_FLAG = 0,
+ MO_NO_FLAG,
/// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
/// relocation of:
/// SYMBOL_LABEL + [. - PICBASELABEL]
- MO_GOT_ABSOLUTE_ADDRESS = 1,
+ MO_GOT_ABSOLUTE_ADDRESS,
/// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
/// immediate should get the value of the symbol minus the PIC base label:
/// SYMBOL_LABEL - PICBASELABEL
- MO_PIC_BASE_OFFSET = 2,
+ MO_PIC_BASE_OFFSET,
/// MO_GOT - On a symbol operand this indicates that the immediate is the
/// offset to the GOT entry for the symbol name from the base of the GOT.
///
/// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @GOT
- MO_GOT = 3,
+ MO_GOT,
/// MO_GOTOFF - On a symbol operand this indicates that the immediate is
/// the offset to the location of the symbol name from the base of the GOT.
///
/// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @GOTOFF
- MO_GOTOFF = 4,
+ MO_GOTOFF,
/// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
/// offset to the GOT entry for the symbol name from the current code
@@ -105,50 +106,115 @@ namespace X86II {
///
/// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @GOTPCREL
- MO_GOTPCREL = 5,
+ MO_GOTPCREL,
/// MO_PLT - On a symbol operand this indicates that the immediate is
/// offset to the PLT entry of symbol name from the current code location.
///
/// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @PLT
- MO_PLT = 6,
+ MO_PLT,
/// MO_TLSGD - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSGD
- MO_TLSGD = 7,
+ MO_TLSGD,
/// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @GOTTPOFF
- MO_GOTTPOFF = 8,
+ MO_GOTTPOFF,
/// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @INDNTPOFF
- MO_INDNTPOFF = 9,
+ MO_INDNTPOFF,
/// MO_TPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TPOFF
- MO_TPOFF = 10,
+ MO_TPOFF,
/// MO_NTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @NTPOFF
- MO_NTPOFF = 11,
+ MO_NTPOFF,
+
+ /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "__imp_FOO" symbol. This is used for
+ /// dllimport linkage on windows.
+ MO_DLLIMPORT,
+
+ /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" symbol. This is used for calls
+ /// and jumps to external functions on Tiger and before.
+ MO_DARWIN_STUB,
+ /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
+ /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY,
+
+ /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
+ /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
+ /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY_PIC_BASE,
+
+ /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
+ /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
+ /// which is a PIC-base-relative reference to a hidden dyld lazy pointer
+ /// stub.
+ MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE
+ };
+}
+
+/// isGlobalStubReference - Return true if the specified TargetFlag operand is
+/// a reference to a stub for a global, not the global itself.
+inline static bool isGlobalStubReference(unsigned char TargetFlag) {
+ switch (TargetFlag) {
+ case X86II::MO_DLLIMPORT: // dllimport stub.
+ case X86II::MO_GOTPCREL: // rip-relative GOT reference.
+ case X86II::MO_GOT: // normal GOT reference.
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Normal $non_lazy_ptr ref.
+ case X86II::MO_DARWIN_NONLAZY: // Normal $non_lazy_ptr ref.
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Hidden $non_lazy_ptr ref.
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isGlobalRelativeToPICBase - Return true if the specified global value
+/// reference is relative to a 32-bit PIC base (X86ISD::GlobalBaseReg). If this
+/// is true, the addressing mode has the PIC base register added in (e.g. EBX).
+inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
+ switch (TargetFlag) {
+ case X86II::MO_GOTOFF: // isPICStyleGOT: local global.
+ case X86II::MO_GOT: // isPICStyleGOT: other global.
+ case X86II::MO_PIC_BASE_OFFSET: // Darwin local global.
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Darwin/32 external global.
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global.
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+ enum {
//===------------------------------------------------------------------===//
// Instruction encodings. These are the standard/most common forms for X86
// instructions.
@@ -249,6 +315,9 @@ namespace X86II {
// T8, TA - Prefix after the 0x0F prefix.
T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
+
+ // TF - Prefix before and after 0x0F
+ TF = 15 << Op0Shift,
//===------------------------------------------------------------------===//
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
@@ -355,10 +424,10 @@ class X86InstrInfo : public TargetInstrInfoImpl {
/// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
/// RegOp2MemOpTable2 - Load / store folding opcode maps.
///
- DenseMap<unsigned*, unsigned> RegOp2MemOpTable2Addr;
- DenseMap<unsigned*, unsigned> RegOp2MemOpTable0;
- DenseMap<unsigned*, unsigned> RegOp2MemOpTable1;
- DenseMap<unsigned*, unsigned> RegOp2MemOpTable2;
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
@@ -382,11 +451,11 @@ public:
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
- bool isReallyTriviallyReMaterializable(const MachineInstr *MI) const;
+ bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const;
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- unsigned DestReg, const MachineInstr *Orig) const;
-
- bool isInvariantLoad(const MachineInstr *MI) const;
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig) const;
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
@@ -430,6 +499,8 @@ public:
virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
SmallVectorImpl<MachineOperand> &Addr,
const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -440,6 +511,8 @@ public:
virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
SmallVectorImpl<MachineOperand> &Addr,
const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
@@ -530,9 +603,10 @@ public:
private:
MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- unsigned OpNum,
- const SmallVectorImpl<MachineOperand> &MOs) const;
+ MachineInstr* MI,
+ unsigned OpNum,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ unsigned Size, unsigned Alignment) const;
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 03df10d..30b57d8 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -56,6 +56,10 @@ def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
+ SDTCisVT<1, iPTR>,
+ SDTCisVT<2, iPTR>]>;
+
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
def SDTX86RdTsc : SDTypeProfile<0, 0, []>;
@@ -114,6 +118,11 @@ def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
[SDNPHasChain, SDNPOptInFlag]>;
+def X86vastart_save_xmm_regs :
+ SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
+ SDT_X86VASTART_SAVE_XMM_REGS,
+ [SDNPHasChain]>;
+
def X86callseq_start :
SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
[SDNPHasChain, SDNPOutFlag]>;
@@ -124,9 +133,6 @@ def X86callseq_end :
def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
[SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
-def X86tailcall: SDNode<"X86ISD::TAILCALL", SDT_X86Call,
- [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
-
def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>;
def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
@@ -156,6 +162,9 @@ def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>;
def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>;
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
+def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags>;
+def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags>;
+def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags>;
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
@@ -167,57 +176,80 @@ def i32imm_pcrel : Operand<i32> {
let PrintMethod = "print_pcrel_imm";
}
+// A version of ptr_rc which excludes SP, ESP, and RSP. This is used for
+// the index operand of an address, to conform to x86 encoding restrictions.
+def ptr_rc_nosp : PointerLikeRegClass<1>;
// *mem - Operand definitions for the funky X86 addressing mode operands.
//
+def X86MemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+ let SuperClass = ?;
+}
class X86MemOperand<string printMethod> : Operand<iPTR> {
let PrintMethod = printMethod;
- let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm);
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
}
+def opaque32mem : X86MemOperand<"printopaquemem">;
+def opaque48mem : X86MemOperand<"printopaquemem">;
+def opaque80mem : X86MemOperand<"printopaquemem">;
+
def i8mem : X86MemOperand<"printi8mem">;
def i16mem : X86MemOperand<"printi16mem">;
def i32mem : X86MemOperand<"printi32mem">;
def i64mem : X86MemOperand<"printi64mem">;
def i128mem : X86MemOperand<"printi128mem">;
-def i256mem : X86MemOperand<"printi256mem">;
+//def i256mem : X86MemOperand<"printi256mem">;
def f32mem : X86MemOperand<"printf32mem">;
def f64mem : X86MemOperand<"printf64mem">;
def f80mem : X86MemOperand<"printf80mem">;
def f128mem : X86MemOperand<"printf128mem">;
-def f256mem : X86MemOperand<"printf256mem">;
+//def f256mem : X86MemOperand<"printf256mem">;
// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
// plain GR64, so that it doesn't potentially require a REX prefix.
def i8mem_NOREX : Operand<i64> {
let PrintMethod = "printi8mem";
- let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX, i32imm, i8imm);
+ let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
}
def lea32mem : Operand<i32> {
let PrintMethod = "printlea32mem";
- let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+ let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
+ let ParserMatchClass = X86MemAsmOperand;
}
def SSECC : Operand<i8> {
let PrintMethod = "printSSECC";
}
-def piclabel: Operand<i32> {
- let PrintMethod = "printPICLabel";
+def ImmSExt8AsmOperand : AsmOperandClass {
+ let Name = "ImmSExt8";
+ let SuperClass = ImmAsmOperand;
}
// A couple of more descriptive operand definitions.
// 16-bits but only 8 bits are significant.
-def i16i8imm : Operand<i16>;
+def i16i8imm : Operand<i16> {
+ let ParserMatchClass = ImmSExt8AsmOperand;
+}
// 32-bits but only 8 bits are significant.
-def i32i8imm : Operand<i32>;
+def i32i8imm : Operand<i32> {
+ let ParserMatchClass = ImmSExt8AsmOperand;
+}
// Branch targets have OtherVT type and print as pc-relative values.
def brtarget : Operand<OtherVT> {
let PrintMethod = "print_pcrel_imm";
}
+def brtarget8 : Operand<OtherVT> {
+ let PrintMethod = "print_pcrel_imm";
+}
+
//===----------------------------------------------------------------------===//
// X86 Complex Pattern Definitions.
//
@@ -225,7 +257,8 @@ def brtarget : Operand<OtherVT> {
// Define X86 specific addressing mode.
def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
- [add, sub, mul, shl, or, frameindex], []>;
+ [add, sub, mul, X86mul_imm, shl, or, frameindex],
+ []>;
def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
@@ -246,8 +279,14 @@ def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">;
+def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
+def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
-def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
+def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
+def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
+ "TM.getCodeModel() != CodeModel::Kernel">;
+def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
+ "TM.getCodeModel() == CodeModel::Kernel">;
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
@@ -484,15 +523,35 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
Requires<[In32BitMode]>;
}
+// x86-64 va_start lowering magic.
+let usesCustomDAGSchedInserter = 1 in
+def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
+ (outs),
+ (ins GR8:$al,
+ i64imm:$regsavefi, i64imm:$offset,
+ variable_ops),
+ "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
+ [(X86vastart_save_xmm_regs GR8:$al,
+ imm:$regsavefi,
+ imm:$offset)]>;
+
// Nop
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1 in {
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
+ def NOOPL : I<0x1f, MRM0m, (outs), (ins i32mem:$zero),
+ "nopl\t$zero", []>, TB;
+}
-// PIC base
+// Trap
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int 3", []>;
+def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
+
+// PIC base construction. This expands to code that looks like this:
+// call $next_inst
+// popl %destreg"
let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
- def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins piclabel:$label),
- "call\t$label\n\t"
- "pop{l}\t$reg", []>;
+ def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
+ "", []>;
//===----------------------------------------------------------------------===//
// Control Flow Instructions...
@@ -506,7 +565,11 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
[(X86retflag 0)]>;
def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret\t$amt",
- [(X86retflag imm:$amt)]>;
+ [(X86retflag timm:$amt)]>;
+ def LRET : I <0xCB, RawFrm, (outs), (ins),
+ "lret", []>;
+ def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "lret\t$amt", []>;
}
// All branches are RawFrm, Void, Branch, and Terminators
@@ -514,8 +577,10 @@ let isBranch = 1, isTerminator = 1 in
class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> :
I<opcode, RawFrm, (outs), ins, asm, pattern>;
-let isBranch = 1, isBarrier = 1 in
+let isBranch = 1, isBarrier = 1 in {
def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>;
+ def JMP8 : IBr<0xEB, (ins brtarget8:$dst), "jmp\t$dst", []>;
+}
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
@@ -523,10 +588,42 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
[(brind GR32:$dst)]>;
def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
[(brind (loadi32 addr:$dst))]>;
+
+ def FARJMP16i : Iseg16<0xEA, RawFrm, (outs),
+ (ins i16imm:$seg, i16imm:$off),
+ "ljmp{w}\t$seg, $off", []>, OpSize;
+ def FARJMP32i : Iseg32<0xEA, RawFrm, (outs),
+ (ins i16imm:$seg, i32imm:$off),
+ "ljmp{l}\t$seg, $off", []>;
+
+ def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
+ "ljmp{w}\t{*}$dst", []>, OpSize;
+ def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
+ "ljmp{l}\t{*}$dst", []>;
}
// Conditional branches
let Uses = [EFLAGS] in {
+// Short conditional jumps
+def JO8 : IBr<0x70, (ins brtarget8:$dst), "jo\t$dst", []>;
+def JNO8 : IBr<0x71, (ins brtarget8:$dst), "jno\t$dst", []>;
+def JB8 : IBr<0x72, (ins brtarget8:$dst), "jb\t$dst", []>;
+def JAE8 : IBr<0x73, (ins brtarget8:$dst), "jae\t$dst", []>;
+def JE8 : IBr<0x74, (ins brtarget8:$dst), "je\t$dst", []>;
+def JNE8 : IBr<0x75, (ins brtarget8:$dst), "jne\t$dst", []>;
+def JBE8 : IBr<0x76, (ins brtarget8:$dst), "jbe\t$dst", []>;
+def JA8 : IBr<0x77, (ins brtarget8:$dst), "ja\t$dst", []>;
+def JS8 : IBr<0x78, (ins brtarget8:$dst), "js\t$dst", []>;
+def JNS8 : IBr<0x79, (ins brtarget8:$dst), "jns\t$dst", []>;
+def JP8 : IBr<0x7A, (ins brtarget8:$dst), "jp\t$dst", []>;
+def JNP8 : IBr<0x7B, (ins brtarget8:$dst), "jnp\t$dst", []>;
+def JL8 : IBr<0x7C, (ins brtarget8:$dst), "jl\t$dst", []>;
+def JGE8 : IBr<0x7D, (ins brtarget8:$dst), "jge\t$dst", []>;
+def JLE8 : IBr<0x7E, (ins brtarget8:$dst), "jle\t$dst", []>;
+def JG8 : IBr<0x7F, (ins brtarget8:$dst), "jg\t$dst", []>;
+
+def JCXZ8 : IBr<0xE3, (ins brtarget8:$dst), "jcxz\t$dst", []>;
+
def JE : IBr<0x84, (ins brtarget:$dst), "je\t$dst",
[(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB;
def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst",
@@ -563,6 +660,12 @@ def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst",
[(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB;
} // Uses = [EFLAGS]
+// Loop instructions
+
+def LOOP : I<0xE2, RawFrm, (ins brtarget8:$dst), (outs), "loop\t$dst", []>;
+def LOOPE : I<0xE1, RawFrm, (ins brtarget8:$dst), (outs), "loope\t$dst", []>;
+def LOOPNE : I<0xE0, RawFrm, (ins brtarget8:$dst), (outs), "loopne\t$dst", []>;
+
//===----------------------------------------------------------------------===//
// Call Instructions...
//
@@ -583,13 +686,26 @@ let isCall = 1 in
"call\t{*}$dst", [(X86call GR32:$dst)]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
"call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
+
+ def FARCALL16i : Iseg16<0x9A, RawFrm, (outs),
+ (ins i16imm:$seg, i16imm:$off),
+ "lcall{w}\t$seg, $off", []>, OpSize;
+ def FARCALL32i : Iseg32<0x9A, RawFrm, (outs),
+ (ins i16imm:$seg, i32imm:$off),
+ "lcall{l}\t$seg, $off", []>;
+
+ def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
+ "lcall{w}\t{*}$dst", []>, OpSize;
+ def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
+ "lcall{l}\t{*}$dst", []>;
}
-// Tail call stuff.
+// Constructing a stack frame.
+
+def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
+ "enter\t$len, $lvl", []>;
-def TAILCALL : I<0, Pseudo, (outs), (ins),
- "#TAILCALL",
- []>;
+// Tail call stuff.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TCRETURNdi : I<0, Pseudo, (outs), (ins i32imm:$dst, i32imm:$offset, variable_ops),
@@ -620,11 +736,29 @@ def LEAVE : I<0xC9, RawFrm,
(outs), (ins), "leave", []>;
let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
-let mayLoad = 1 in
-def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+let mayLoad = 1 in {
+def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+ OpSize;
+def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+ OpSize;
+def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>,
+ OpSize;
+def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>;
+}
-let mayStore = 1 in
+let mayStore = 1 in {
+def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+ OpSize;
def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+ OpSize;
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>,
+ OpSize;
+def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
+}
}
let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
@@ -710,6 +844,14 @@ let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI] in
def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
[(X86rep_stos i32)]>, REP;
+def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
+def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
+def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
+
+def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>;
+def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize;
+def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>;
+
let Defs = [RAX, RDX] in
def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
TB;
@@ -718,6 +860,18 @@ let isBarrier = 1, hasCtrlDep = 1 in {
def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
}
+def SYSCALL : I<0x05, RawFrm,
+ (outs), (ins), "syscall", []>, TB;
+def SYSRET : I<0x07, RawFrm,
+ (outs), (ins), "sysret", []>, TB;
+def SYSENTER : I<0x34, RawFrm,
+ (outs), (ins), "sysenter", []>, TB;
+def SYSEXIT : I<0x35, RawFrm,
+ (outs), (ins), "sysexit", []>, TB;
+
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+
+
//===----------------------------------------------------------------------===//
// Input/Output Instructions...
//
@@ -793,6 +947,30 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(store (i32 imm:$src), addr:$dst)]>;
+def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins i8imm:$src),
+ "mov{b}\t{$src, %al|%al, $src}", []>;
+def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins i16imm:$src),
+ "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins i32imm:$src),
+ "mov{l}\t{$src, %eax|%eax, $src}", []>;
+
+def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs i8imm:$dst), (ins),
+ "mov{b}\t{%al, $dst|$dst, %al}", []>;
+def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs i16imm:$dst), (ins),
+ "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize;
+def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs i32imm:$dst), (ins),
+ "mov{l}\t{%eax, $dst|$dst, %eax}", []>;
+
+// Moves to and from segment registers
+def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>;
+def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>;
+def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>;
+def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>;
+
let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
@@ -950,6 +1128,20 @@ let isTwoAddress = 1 in {
// Conditional moves
let Uses = [EFLAGS] in {
+
+// X86 doesn't have 8-bit conditional moves. Use a customDAGSchedInserter to
+// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
+// however that requires promoting the operands, and can induce additional
+// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
+// clobber EFLAGS, because if one of the operands is zero, the expansion
+// could involve an xor.
+let usesCustomDAGSchedInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in
+def CMOV_GR8 : I<0, Pseudo,
+ (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
+ "#CMOV_GR8 PSEUDO!",
+ [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
+ imm:$cond, EFLAGS))]>;
+
let isCommutable = 1 in {
def CMOVB16rr : I<0x42, MRMSrcReg, // if <u, GR16 = GR16
(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
@@ -1549,6 +1741,14 @@ let isTwoAddress = 0 in {
"and{l}\t{$src, $dst|$dst, $src}",
[(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst),
(implicit EFLAGS)]>;
+
+ def AND8i8 : Ii8<0x24, RawFrm, (outs), (ins i8imm:$src),
+ "and{b}\t{$src, %al|%al, $src}", []>;
+ def AND16i16 : Ii16<0x25, RawFrm, (outs), (ins i16imm:$src),
+ "and{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src),
+ "and{l}\t{$src, %eax|%eax, $src}", []>;
+
}
@@ -1635,6 +1835,13 @@ let isTwoAddress = 0 in {
"or{l}\t{$src, $dst|$dst, $src}",
[(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst),
(implicit EFLAGS)]>;
+
+ def OR8i8 : Ii8 <0x0C, RawFrm, (outs), (ins i8imm:$src),
+ "or{b}\t{$src, %al|%al, $src}", []>;
+ def OR16i16 : Ii16 <0x0D, RawFrm, (outs), (ins i16imm:$src),
+ "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src),
+ "or{l}\t{$src, %eax|%eax, $src}", []>;
} // isTwoAddress = 0
@@ -1744,6 +1951,13 @@ let isTwoAddress = 0 in {
"xor{l}\t{$src, $dst|$dst, $src}",
[(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst),
(implicit EFLAGS)]>;
+
+ def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src),
+ "xor{b}\t{$src, %al|%al, $src}", []>;
+ def XOR16i16 : Ii16 <0x35, RawFrm, (outs), (ins i16imm:$src),
+ "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ def XOR32i32 : Ii32 <0x35, RawFrm, (outs), (ins i32imm:$src),
+ "xor{l}\t{$src, %eax|%eax, $src}", []>;
} // isTwoAddress = 0
} // Defs = [EFLAGS]
@@ -1771,8 +1985,17 @@ def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
"shl{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
-// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is
-// cheaper.
+
+// NOTE: We don't include patterns for shifts of a register by one, because
+// 'add reg,reg' is cheaper.
+
+def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
+ "shl{b}\t$dst", []>;
+def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+ "shl{w}\t$dst", []>, OpSize;
+def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+ "shl{l}\t$dst", []>;
+
} // isConvertibleToThreeAddress = 1
let isTwoAddress = 0 in {
@@ -1951,6 +2174,97 @@ let isTwoAddress = 0 in {
}
// Rotate instructions
+
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src),
+ "rcl{b}\t{1, $dst|$dst, 1}", []>;
+def RCL8m1 : I<0xD0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src),
+ "rcl{b}\t{1, $dst|$dst, 1}", []>;
+let Uses = [CL] in {
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCL8mCL : I<0xD2, MRM2m, (outs i8mem:$dst), (ins i8mem:$src),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+}
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src),
+ "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+def RCL16m1 : I<0xD1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src),
+ "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+let Uses = [CL] in {
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCL16mCL : I<0xD3, MRM2m, (outs i16mem:$dst), (ins i16mem:$src),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+}
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src),
+ "rcl{l}\t{1, $dst|$dst, 1}", []>;
+def RCL32m1 : I<0xD1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src),
+ "rcl{l}\t{1, $dst|$dst, 1}", []>;
+let Uses = [CL] in {
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCL32mCL : I<0xD3, MRM2m, (outs i32mem:$dst), (ins i32mem:$src),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+}
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src),
+ "rcr{b}\t{1, $dst|$dst, 1}", []>;
+def RCR8m1 : I<0xD0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src),
+ "rcr{b}\t{1, $dst|$dst, 1}", []>;
+let Uses = [CL] in {
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCR8mCL : I<0xD2, MRM3m, (outs i8mem:$dst), (ins i8mem:$src),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+}
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src),
+ "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+def RCR16m1 : I<0xD1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src),
+ "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+let Uses = [CL] in {
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCR16mCL : I<0xD3, MRM3m, (outs i16mem:$dst), (ins i16mem:$src),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+}
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src),
+ "rcr{l}\t{1, $dst|$dst, 1}", []>;
+def RCR32m1 : I<0xD1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src),
+ "rcr{l}\t{1, $dst|$dst, 1}", []>;
+let Uses = [CL] in {
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCR32mCL : I<0xD3, MRM3m, (outs i32mem:$dst), (ins i32mem:$src),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+}
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src, i8imm:$cnt),
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+
// FIXME: provide shorter instructions when imm8 == 1
let Uses = [CL] in {
def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
@@ -2228,6 +2542,15 @@ def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst),
"add{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (add GR32:$src1, (load addr:$src2))),
(implicit EFLAGS)]>;
+
+// Register-Register Addition - Equivalent to the normal rr forms (ADD8rr,
+// ADD16rr, and ADD32rr), but differently encoded.
+def ADD8mrmrr: I<0x02, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}", []>;
+def ADD16mrmrr: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
+def ADD32mrmrr: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>;
// Register-Integer Addition
def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
@@ -2295,6 +2618,14 @@ let isTwoAddress = 0 in {
[(store (add (load addr:$dst), i32immSExt8:$src2),
addr:$dst),
(implicit EFLAGS)]>;
+
+ // addition to rAX
+ def ADD8i8 : Ii8<0x04, RawFrm, (outs), (ins i8imm:$src),
+ "add{b}\t{$src, %al|%al, $src}", []>;
+ def ADD16i16 : Ii16<0x05, RawFrm, (outs), (ins i16imm:$src),
+ "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src),
+ "add{l}\t{$src, %eax|%eax, $src}", []>;
}
let Uses = [EFLAGS] in {
@@ -2373,6 +2704,13 @@ let isTwoAddress = 0 in {
def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
"adc{l}\t{$src2, $dst|$dst, $src2}",
[(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+
+ def ADC8i8 : Ii8<0x14, RawFrm, (outs), (ins i8imm:$src),
+ "adc{b}\t{$src, %al|%al, $src}", []>;
+ def ADC16i16 : Ii16<0x15, RawFrm, (outs), (ins i16imm:$src),
+ "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src),
+ "adc{l}\t{$src, %eax|%eax, $src}", []>;
}
} // Uses = [EFLAGS]
@@ -2472,6 +2810,13 @@ let isTwoAddress = 0 in {
[(store (sub (load addr:$dst), i32immSExt8:$src2),
addr:$dst),
(implicit EFLAGS)]>;
+
+ def SUB8i8 : Ii8<0x2C, RawFrm, (outs), (ins i8imm:$src),
+ "sub{b}\t{$src, %al|%al, $src}", []>;
+ def SUB16i16 : Ii16<0x2D, RawFrm, (outs), (ins i16imm:$src),
+ "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src),
+ "sub{l}\t{$src, %eax|%eax, $src}", []>;
}
let Uses = [EFLAGS] in {
@@ -2516,6 +2861,13 @@ let isTwoAddress = 0 in {
def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
"sbb{l}\t{$src2, $dst|$dst, $src2}",
[(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+
+ def SBB8i8 : Ii8<0x1C, RawFrm, (outs), (ins i8imm:$src),
+ "sbb{b}\t{$src, %al|%al, $src}", []>;
+ def SBB16i16 : Ii16<0x1D, RawFrm, (outs), (ins i16imm:$src),
+ "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src),
+ "sbb{l}\t{$src, %eax|%eax, $src}", []>;
}
def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
"sbb{b}\t{$src2, $dst|$dst, $src2}",
@@ -2647,6 +2999,13 @@ def TEST32rr : I<0x85, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
(implicit EFLAGS)]>;
}
+def TEST8i8 : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src),
+ "test{b}\t{$src, %al|%al, $src}", []>;
+def TEST16i16 : Ii16<0xA9, RawFrm, (outs), (ins i16imm:$src),
+ "test{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src),
+ "test{l}\t{$src, %eax|%eax, $src}", []>;
+
def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2),
"test{b}\t{$src2, $src1|$src1, $src2}",
[(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0),
@@ -2878,6 +3237,13 @@ def SETNOm : I<0x91, MRM0m,
// Integer comparisons
let Defs = [EFLAGS] in {
+def CMP8i8 : Ii8<0x3C, RawFrm, (outs), (ins i8imm:$src),
+ "cmp{b}\t{$src, %al|%al, $src}", []>;
+def CMP16i16 : Ii16<0x3D, RawFrm, (outs), (ins i16imm:$src),
+ "cmp{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src),
+ "cmp{l}\t{$src, %eax|%eax, $src}", []>;
+
def CMP8rr : I<0x38, MRMDestReg,
(outs), (ins GR8 :$src1, GR8 :$src2),
"cmp{b}\t{$src2, $src1|$src1, $src2}",
@@ -2920,6 +3286,12 @@ def CMP32rm : I<0x3B, MRMSrcMem,
"cmp{l}\t{$src2, $src1|$src1, $src2}",
[(X86cmp GR32:$src1, (loadi32 addr:$src2)),
(implicit EFLAGS)]>;
+def CMP8mrmrr : I<0x3A, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2),
+ "cmp{b}\t{$src2, $src1|$src1, $src2}", []>;
+def CMP16mrmrr : I<0x3B, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize;
+def CMP32mrmrr : I<0x3B, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}", []>;
def CMP8ri : Ii8<0x80, MRM7r,
(outs), (ins GR8:$src1, i8imm:$src2),
"cmp{b}\t{$src2, $src1|$src1, $src2}",
@@ -3095,7 +3467,8 @@ let neverHasSideEffects = 1 in {
// Alias instructions that map movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isCodeGenOnly = 1 in {
def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
"xor{b}\t$dst, $dst",
[(set GR8:$dst, 0)]>;
@@ -3127,12 +3500,12 @@ def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym),
[(X86tlsaddr tls32addr:$sym)]>,
Requires<[In32BitMode]>;
-let AddedComplexity = 5 in
+let AddedComplexity = 5, isCodeGenOnly = 1 in
def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"movl\t%gs:$src, $dst",
[(set GR32:$dst, (gsload addr:$src))]>, SegGS;
-let AddedComplexity = 5 in
+let AddedComplexity = 5, isCodeGenOnly = 1 in
def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"movl\t%fs:$src, $dst",
[(set GR32:$dst, (fsload addr:$src))]>, SegFS;
@@ -3143,7 +3516,7 @@ def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
def DWARF_LOC : I<0, Pseudo, (outs),
(ins i32imm:$line, i32imm:$col, i32imm:$file),
- ".loc\t${file:debug} ${line:debug} ${col:debug}",
+ ".loc\t$file $line $col",
[(dwarf_loc (i32 imm:$line), (i32 imm:$col),
(i32 imm:$file))]>;
@@ -3151,7 +3524,7 @@ def DWARF_LOC : I<0, Pseudo, (outs),
// EH Pseudo Instructions
//
let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1 in {
+ hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
"ret\t#eh_return, addr: $addr",
[(X86ehret GR32:$addr)]>;
@@ -3223,6 +3596,78 @@ def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val),
TB, LOCK;
}
+// Optimized codegen when the non-memory output is not used.
+// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
+def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ "lock\n\t"
+ "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "lock\n\t"
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "lock\n\t"
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
+ "lock\n\t"
+ "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "lock\n\t"
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "lock\n\t"
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "lock\n\t"
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "lock\n\t"
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+ "lock\n\t"
+ "inc{b}\t$dst", []>, LOCK;
+def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+ "lock\n\t"
+ "inc{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+ "lock\n\t"
+ "inc{l}\t$dst", []>, LOCK;
+
+def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
+ "lock\n\t"
+ "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "lock\n\t"
+ "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "lock\n\t"
+ "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
+ "lock\n\t"
+ "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "lock\n\t"
+ "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "lock\n\t"
+ "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "lock\n\t"
+ "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "lock\n\t"
+ "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+ "lock\n\t"
+ "dec{b}\t$dst", []>, LOCK;
+def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+ "lock\n\t"
+ "dec{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+ "lock\n\t"
+ "dec{l}\t$dst", []>, LOCK;
+
// Atomic exchange, and, or, xor
let Constraints = "$val = $dst", Defs = [EFLAGS],
usesCustomDAGSchedInserter = 1 in {
@@ -3318,6 +3763,25 @@ def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
"#ATOMSWAP6432 PSEUDO!", []>;
}
+// Segmentation support instructions.
+
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+
+// String manipulation instructions
+
+def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
+def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize;
+def LODSD : I<0xAD, RawFrm, (outs), (ins), "lodsd", []>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
@@ -3345,14 +3809,6 @@ def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
// Calls
// tailcall stuff
-def : Pat<(X86tailcall GR32:$dst),
- (TAILCALL)>;
-
-def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
- (TAILCALL)>;
-def : Pat<(X86tailcall (i32 texternalsym:$dst)),
- (TAILCALL)>;
-
def : Pat<(X86tcret GR32:$dst, imm:$off),
(TCRETURNri GR32:$dst, imm:$off)>;
@@ -3362,6 +3818,7 @@ def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>;
+// Normal calls, with various flavors of addresses.
def : Pat<(X86call (i32 tglobaladdr:$dst)),
(CALLpcrel32 tglobaladdr:$dst)>;
def : Pat<(X86call (i32 texternalsym:$dst)),
@@ -3472,21 +3929,17 @@ def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
// extload bool -> extload byte
def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>,
- Requires<[In32BitMode]>;
+def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>,
- Requires<[In32BitMode]>;
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
-// anyext
-def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>,
- Requires<[In32BitMode]>;
-def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>,
- Requires<[In32BitMode]>;
-def : Pat<(i32 (anyext GR16:$src)),
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
+// anyext. Define these to do an explicit zero-extend to
+// avoid partial-register updates.
+def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
+def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>;
// (and (i32 load), 255) -> (zextload i8)
def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))),
@@ -3567,6 +4020,10 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
(MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
x86_subreg_8bit_hi))>,
Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In32BitMode]>;
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
(MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
x86_subreg_8bit_hi))>,
@@ -3961,6 +4418,243 @@ def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
(implicit EFLAGS)),
(DEC32m addr:$dst)>, Requires<[In32BitMode]>;
+// Register-Register Or with EFLAGS result
+def : Pat<(parallel (X86or_flag GR8:$src1, GR8:$src2),
+ (implicit EFLAGS)),
+ (OR8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(parallel (X86or_flag GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (OR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86or_flag GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (OR32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory Or with EFLAGS result
+def : Pat<(parallel (X86or_flag GR8:$src1, (loadi8 addr:$src2)),
+ (implicit EFLAGS)),
+ (OR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(parallel (X86or_flag GR16:$src1, (loadi16 addr:$src2)),
+ (implicit EFLAGS)),
+ (OR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86or_flag GR32:$src1, (loadi32 addr:$src2)),
+ (implicit EFLAGS)),
+ (OR32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer Or with EFLAGS result
+def : Pat<(parallel (X86or_flag GR8:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (OR8ri GR8:$src1, imm:$src2)>;
+def : Pat<(parallel (X86or_flag GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (OR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86or_flag GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (OR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86or_flag GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86or_flag GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Register Or with EFLAGS result
+def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), GR8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (OR8mr addr:$dst, GR8:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), GR16:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (OR16mr addr:$dst, GR16:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), GR32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (OR32mr addr:$dst, GR32:$src2)>;
+
+// Memory-Integer Or with EFLAGS result
+def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (OR8mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (OR16mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (OR32mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (OR16mi8 addr:$dst, i16immSExt8:$src2)>;
+def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (OR32mi8 addr:$dst, i32immSExt8:$src2)>;
+
+// Register-Register XOr with EFLAGS result
+def : Pat<(parallel (X86xor_flag GR8:$src1, GR8:$src2),
+ (implicit EFLAGS)),
+ (XOR8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(parallel (X86xor_flag GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (XOR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86xor_flag GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (XOR32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory XOr with EFLAGS result
+def : Pat<(parallel (X86xor_flag GR8:$src1, (loadi8 addr:$src2)),
+ (implicit EFLAGS)),
+ (XOR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(parallel (X86xor_flag GR16:$src1, (loadi16 addr:$src2)),
+ (implicit EFLAGS)),
+ (XOR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86xor_flag GR32:$src1, (loadi32 addr:$src2)),
+ (implicit EFLAGS)),
+ (XOR32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer XOr with EFLAGS result
+def : Pat<(parallel (X86xor_flag GR8:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (XOR8ri GR8:$src1, imm:$src2)>;
+def : Pat<(parallel (X86xor_flag GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (XOR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86xor_flag GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (XOR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86xor_flag GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86xor_flag GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Register XOr with EFLAGS result
+def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), GR8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (XOR8mr addr:$dst, GR8:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), GR16:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (XOR16mr addr:$dst, GR16:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), GR32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (XOR32mr addr:$dst, GR32:$src2)>;
+
+// Memory-Integer XOr with EFLAGS result
+def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (XOR8mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (XOR16mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (XOR32mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (XOR16mi8 addr:$dst, i16immSExt8:$src2)>;
+def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (XOR32mi8 addr:$dst, i32immSExt8:$src2)>;
+
+// Register-Register And with EFLAGS result
+def : Pat<(parallel (X86and_flag GR8:$src1, GR8:$src2),
+ (implicit EFLAGS)),
+ (AND8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(parallel (X86and_flag GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (AND16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86and_flag GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (AND32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory And with EFLAGS result
+def : Pat<(parallel (X86and_flag GR8:$src1, (loadi8 addr:$src2)),
+ (implicit EFLAGS)),
+ (AND8rm GR8:$src1, addr:$src2)>;
+def : Pat<(parallel (X86and_flag GR16:$src1, (loadi16 addr:$src2)),
+ (implicit EFLAGS)),
+ (AND16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86and_flag GR32:$src1, (loadi32 addr:$src2)),
+ (implicit EFLAGS)),
+ (AND32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer And with EFLAGS result
+def : Pat<(parallel (X86and_flag GR8:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (AND8ri GR8:$src1, imm:$src2)>;
+def : Pat<(parallel (X86and_flag GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (AND16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86and_flag GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (AND32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86and_flag GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86and_flag GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Register And with EFLAGS result
+def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), GR8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (AND8mr addr:$dst, GR8:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), GR16:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (AND16mr addr:$dst, GR16:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), GR32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (AND32mr addr:$dst, GR32:$src2)>;
+
+// Memory-Integer And with EFLAGS result
+def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (AND8mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (AND16mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (AND32mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (AND16mi8 addr:$dst, i16immSExt8:$src2)>;
+def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (AND32mi8 addr:$dst, i32immSExt8:$src2)>;
+
+// -disable-16bit support.
+def : Pat<(truncstorei16 (i32 imm:$src), addr:$dst),
+ (MOV16mi addr:$dst, imm:$src)>;
+def : Pat<(truncstorei16 GR32:$src, addr:$dst),
+ (MOV16mr addr:$dst, (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
+def : Pat<(i32 (sextloadi16 addr:$dst)),
+ (MOVSX32rm16 addr:$dst)>;
+def : Pat<(i32 (zextloadi16 addr:$dst)),
+ (MOVZX32rm16 addr:$dst)>;
+def : Pat<(i32 (extloadi16 addr:$dst)),
+ (MOVZX32rm16 addr:$dst)>;
+
//===----------------------------------------------------------------------===//
// Floating Point Stack Support
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index b79a006..ce76b4e 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -1,10 +1,10 @@
//====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 MMX instruction set, defining the instructions,
@@ -67,16 +67,18 @@ def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
// MMX Multiclasses
//===----------------------------------------------------------------------===//
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
// MMXI_binop_rm - Simple MMX binary operator.
multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
- def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> {
let isCommutable = Commutable;
}
- def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (OpVT (OpNode VR64:$src1,
(bitconvert
@@ -85,12 +87,14 @@ let isTwoAddress = 1 in {
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
bit Commutable = 0> {
- def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
let isCommutable = Commutable;
}
- def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>;
@@ -139,8 +143,10 @@ let isTwoAddress = 1 in {
// MMX EMMS & FEMMS Instructions
//===----------------------------------------------------------------------===//
-def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
-def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms",
+ [(int_x86_mmx_emms)]>;
+def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms",
+ [(int_x86_mmx_femms)]>;
//===----------------------------------------------------------------------===//
// MMX Scalar Instructions
@@ -149,12 +155,14 @@ def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]
// Data Transfer Instructions
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (v2i32 (scalar_to_vector GR32:$src)))]>;
+ [(set VR64:$dst,
+ (v2i32 (scalar_to_vector GR32:$src)))]>;
let canFoldAsLoad = 1, isReMaterializable = 1 in
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
-let mayStore = 1 in
+ [(set VR64:$dst,
+ (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
@@ -164,9 +172,16 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
[]>;
let neverHasSideEffects = 1 in
-def MMX_MOVD64from64rr : MMXRI<0x7E, MRMSrcReg,
+// These are 64 bit moves, but since the OS X assembler doesn't
+// recognize a register-register movq, we write them as
+// movd.
+def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
+def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v1i64 (scalar_to_vector GR64:$src)))]>;
let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
@@ -179,21 +194,21 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (v1i64 VR64:$src), addr:$dst)]>;
-def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src),
+def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(v1i64 (bitconvert
(i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))))))]>;
-def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src),
+def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
"movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(movl immAllZerosV,
(v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>;
let neverHasSideEffects = 1 in
-def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src),
+def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
"movq2dq\t{$src, $dst|$dst, $src}", []>;
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
@@ -207,7 +222,8 @@ def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
[(set VR64:$dst,
(v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
let AddedComplexity = 20 in
-def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
+ (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(v2i32 (X86vzmovl (v2i32
@@ -265,7 +281,7 @@ defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>;
defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>;
defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"pandn\t{$src2, $dst|$dst, $src2}",
@@ -316,33 +332,33 @@ defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
// Conversion Instructions
// -- Unpack Instructions
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
// Unpack High Packed Data Instructions
- def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
+ def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
+ def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v8i8 (mmx_unpckh VR64:$src1,
(bc_v8i8 (load_mmx addr:$src2)))))]>;
- def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
+ def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
+ def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v4i16 (mmx_unpckh VR64:$src1,
(bc_v4i16 (load_mmx addr:$src2)))))]>;
- def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
+ def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
@@ -379,12 +395,12 @@ let isTwoAddress = 1 in {
(v4i16 (mmx_unpckl VR64:$src1,
(bc_v4i16 (load_mmx addr:$src2)))))]>;
- def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
+ def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
+ def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
@@ -415,19 +431,22 @@ let neverHasSideEffects = 1 in {
def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
let mayLoad = 1 in
-def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
+def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst),
+ (ins f128mem:$src),
"cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
"cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
let mayLoad = 1 in
-def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst),
+ (ins i64mem:$src),
"cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
"cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
let mayLoad = 1 in
-def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst),
+ (ins i64mem:$src),
"cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
@@ -439,7 +458,8 @@ def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
let mayLoad = 1 in
-def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
+def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst),
+ (ins f128mem:$src),
"cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
@@ -459,14 +479,16 @@ def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg,
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1),
(iPTR imm:$src2)))]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, i16i8imm:$src3),
+ (outs VR64:$dst), (ins VR64:$src1, GR32:$src2,
+ i16i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
- GR32:$src2, (iPTR imm:$src3))))]>;
+ GR32:$src2,(iPTR imm:$src3))))]>;
def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3),
+ (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2,
+ i16i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst,
(v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
@@ -494,7 +516,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
//===----------------------------------------------------------------------===//
// Alias instructions that map zero vector to pxor.
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isCodeGenOnly = 1 in {
def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins),
"pxor\t$dst, $dst",
[(set VR64:$dst, (v2i32 immAllZerosV))]>;
@@ -579,7 +601,7 @@ def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
let AddedComplexity = 20 in {
def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
- (MMX_MOVZDI2PDIrm addr:$src)>;
+ (MMX_MOVZDI2PDIrm addr:$src)>;
}
// Clear top half.
@@ -657,6 +679,33 @@ def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))))),
(v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
+// Patterns for vector comparisons
+def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, VR64:$src2)),
+ (MMX_PCMPEQBrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
+ (MMX_PCMPEQBrm VR64:$src1, addr:$src2)>;
+def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, VR64:$src2)),
+ (MMX_PCMPEQWrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
+ (MMX_PCMPEQWrm VR64:$src1, addr:$src2)>;
+def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, VR64:$src2)),
+ (MMX_PCMPEQDrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
+ (MMX_PCMPEQDrm VR64:$src1, addr:$src2)>;
+
+def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, VR64:$src2)),
+ (MMX_PCMPGTBrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
+ (MMX_PCMPGTBrm VR64:$src1, addr:$src2)>;
+def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, VR64:$src2)),
+ (MMX_PCMPGTWrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
+ (MMX_PCMPGTWrm VR64:$src1, addr:$src2)>;
+def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)),
+ (MMX_PCMPGTDrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
+ (MMX_PCMPGTDrm VR64:$src1, addr:$src2)>;
+
// CMOV* - Used to implement the SELECT DAG operation. Expanded by the
// scheduler into a branch sequence.
// These are expanded by the scheduler.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 5d6ef36..96fc932 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1,10 +1,10 @@
//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 SSE instruction set, defining the instructions,
@@ -36,22 +36,22 @@ def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
-def X86pshufb : SDNode<"X86ISD::PSHUFB",
+def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
-def X86pinsrb : SDNode<"X86ISD::PINSRB",
+def X86pinsrb : SDNode<"X86ISD::PINSRB",
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
-def X86pinsrw : SDNode<"X86ISD::PINSRW",
+def X86pinsrw : SDNode<"X86ISD::PINSRW",
SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
-def X86insrtps : SDNode<"X86ISD::INSERTPS",
+def X86insrtps : SDNode<"X86ISD::INSERTPS",
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
- SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>;
+ SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
@@ -69,6 +69,10 @@ def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
+def SDTX86CmpPTest : SDTypeProfile<0, 2, [SDTCisVT<0, v4f32>,
+ SDTCisVT<1, v4f32>]>;
+def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
@@ -83,11 +87,13 @@ def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
def ssmem : Operand<v4f32> {
let PrintMethod = "printf32mem";
- let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm);
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
}
def sdmem : Operand<v2f64> {
let PrintMethod = "printf64mem";
- let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm);
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
}
//===----------------------------------------------------------------------===//
@@ -179,13 +185,13 @@ def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShuffleSHUFImmediate(N));
}]>;
-// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
+// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
// PSHUFHW imm.
def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
}]>;
-// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
+// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
// PSHUFLW imm.
def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
@@ -360,25 +366,25 @@ def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
[(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
"cvtps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtps2pi
+ [(set VR64:$dst, (int_x86_sse_cvtps2pi
(load addr:$src)))]>;
def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"cvttps2pi\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>;
def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
"cvttps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttps2pi
+ [(set VR64:$dst, (int_x86_sse_cvttps2pi
(load addr:$src)))]>;
let Constraints = "$src1 = $dst" in {
- def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
+ def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR64:$src2),
"cvtpi2ps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
VR64:$src2))]>;
- def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
+ def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
"cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
+ [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
(load addr:$src2)))]>;
}
@@ -407,11 +413,11 @@ let Constraints = "$src1 = $dst" in {
// Comparison instructions
let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
- def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
+ def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
(outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
let mayLoad = 1 in
- def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
+ def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
(outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
}
@@ -428,13 +434,15 @@ def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
// Aliases to match intrinsics which expect XMM operand(s).
let Constraints = "$src1 = $dst" in {
- def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+ def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src,
+ SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc),
+ VR128:$src, imm:$cc))]>;
+ def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src,
+ SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
(load addr:$src), imm:$cc))]>;
@@ -460,18 +468,19 @@ def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
(implicit EFLAGS)]>;
} // Defs = [EFLAGS]
-// Aliases of packed SSE1 instructions for scalar use. These all have names that
-// start with 'Fs'.
+// Aliases of packed SSE1 instructions for scalar use. These all have names
+// that start with 'Fs'.
// Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+ canFoldAsLoad = 1 in
def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
"pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>,
Requires<[HasSSE1]>, TB, OpSize;
// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
// disregarded.
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1 in
def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
"movaps\t{$src, $dst|$dst, $src}", []>;
@@ -552,7 +561,7 @@ multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
(ins FR32:$src1, f32mem:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
[(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
-
+
// Vector operation, reg+reg.
def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -616,7 +625,7 @@ multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
(ins FR32:$src1, f32mem:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
[(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
-
+
// Vector operation, reg+reg.
def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -671,7 +680,7 @@ defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
// SSE packed FP Instructions
// Move Instructions
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1 in
def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", []>;
let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
@@ -708,13 +717,13 @@ let Constraints = "$src1 = $dst" in {
def MOVLPSrm : PSI<0x12, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movlps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
+ [(set VR128:$dst,
(movlp VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
def MOVHPSrm : PSI<0x16, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
+ [(set VR128:$dst,
(movhp VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
} // AddedComplexity
@@ -789,7 +798,7 @@ multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))]>;
-
+
// Vector operation, reg.
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
@@ -890,12 +899,12 @@ let Constraints = "$src1 = $dst" in {
}
let Constraints = "$src1 = $dst" in {
- def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
+ def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
"cmp${cc}ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
VR128:$src, imm:$cc))]>;
- def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
+ def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
"cmp${cc}ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
@@ -909,13 +918,13 @@ def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
// Shuffle and unpack instructions
let Constraints = "$src1 = $dst" in {
let isConvertibleToThreeAddress = 1 in // Convert to pshufd
- def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
+ def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1,
VR128:$src2, i8imm:$src3),
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
- def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
+ def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
f128mem:$src2, i8imm:$src3),
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
@@ -924,24 +933,24 @@ let Constraints = "$src1 = $dst" in {
VR128:$src1, (memopv4f32 addr:$src2))))]>;
let AddedComplexity = 10 in {
- def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
+ def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpckhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
- def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
+ def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpckhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (unpckh VR128:$src1,
(memopv4f32 addr:$src2))))]>;
- def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
+ def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpcklps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
- def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
+ def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpcklps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
@@ -984,7 +993,8 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
// Alias instructions that map zero vector to pxor / xorp* for sse.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1 in
def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
"xorps\t$dst, $dst",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
@@ -1046,14 +1056,14 @@ let AddedComplexity = 20 in
def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
"movss\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
- (loadf32 addr:$src))))))]>;
+ (loadf32 addr:$src))))))]>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
(MOVZSS2PSrm addr:$src)>;
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
// SSE2 Instructions
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
// Move Instructions
let neverHasSideEffects = 1 in
@@ -1077,7 +1087,7 @@ def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))]>;
-def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
+def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
@@ -1087,6 +1097,27 @@ def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
"cvtsi2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
+def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PSrr : PSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2ps\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PSrm : PSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtdq2ps\t{$src, $dst|$dst, $src}", []>;
+def COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "comisd\t{$src2, $src1|$src1, $src2}", []>;
+def COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "comisd\t{$src2, $src1|$src1, $src2}", []>;
+
// SSE2 instructions with XS prefix
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
@@ -1112,21 +1143,21 @@ def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
[(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
"cvtpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtpd2pi
+ [(set VR64:$dst, (int_x86_sse_cvtpd2pi
(memop addr:$src)))]>;
def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"cvttpd2pi\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>;
def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
"cvttpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttpd2pi
+ [(set VR64:$dst, (int_x86_sse_cvttpd2pi
(memop addr:$src)))]>;
def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
"cvtpi2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtpi2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2pd
+ [(set VR128:$dst, (int_x86_sse_cvtpi2pd
(load addr:$src)))]>;
// Aliases for intrinsics
@@ -1141,11 +1172,11 @@ def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
// Comparison instructions
let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
- def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
+ def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
(outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
let mayLoad = 1 in
- def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
+ def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
(outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
}
@@ -1162,13 +1193,15 @@ def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
// Aliases to match intrinsics which expect XMM operand(s).
let Constraints = "$src1 = $dst" in {
- def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+ def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src,
+ SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
VR128:$src, imm:$cc))]>;
- def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc),
+ def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src,
+ SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
(load addr:$src), imm:$cc))]>;
@@ -1194,11 +1227,12 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
(implicit EFLAGS)]>;
} // Defs = [EFLAGS]
-// Aliases of packed SSE2 instructions for scalar use. These all have names that
-// start with 'Fs'.
+// Aliases of packed SSE2 instructions for scalar use. These all have names
+// that start with 'Fs'.
// Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+ canFoldAsLoad = 1 in
def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
"pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>,
Requires<[HasSSE2]>, TB, OpSize;
@@ -1286,7 +1320,7 @@ multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
(ins FR64:$src1, f64mem:$src2),
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
[(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
-
+
// Vector operation, reg+reg.
def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -1350,7 +1384,7 @@ multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
(ins FR64:$src1, f64mem:$src2),
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
[(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
-
+
// Vector operation, reg+reg.
def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -1402,7 +1436,7 @@ defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
// SSE packed FP Instructions
// Move Instructions
@@ -1442,13 +1476,13 @@ let Constraints = "$src1 = $dst" in {
def MOVLPDrm : PDI<0x12, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movlpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
+ [(set VR128:$dst,
(v2f64 (movlp VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))]>;
def MOVHPDrm : PDI<0x16, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
+ [(set VR128:$dst,
(v2f64 (movhp VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))]>;
} // AddedComplexity
@@ -1564,7 +1598,7 @@ def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
VR128:$src2))]>;
def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
(load addr:$src2)))]>;
@@ -1612,7 +1646,7 @@ multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set FR64:$dst, (OpNode (load addr:$src)))]>;
-
+
// Vector operation, reg.
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
@@ -1712,12 +1746,12 @@ let Constraints = "$src1 = $dst" in {
}
let Constraints = "$src1 = $dst" in {
- def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
+ def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
"cmp${cc}pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
VR128:$src, imm:$cc))]>;
- def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
+ def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
"cmp${cc}pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
@@ -1730,12 +1764,12 @@ def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
// Shuffle and unpack instructions
let Constraints = "$src1 = $dst" in {
- def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
+ def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
- def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
+ def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
f128mem:$src2, i8imm:$src3),
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
@@ -1744,24 +1778,24 @@ let Constraints = "$src1 = $dst" in {
VR128:$src1, (memopv2f64 addr:$src2))))]>;
let AddedComplexity = 10 in {
- def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
+ def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpckhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
- def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
+ def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpckhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (unpckh VR128:$src1,
(memopv2f64 addr:$src2))))]>;
- def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
+ def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpcklpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
- def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
+ def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpcklpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
@@ -1770,7 +1804,7 @@ let Constraints = "$src1 = $dst" in {
} // Constraints = "$src1 = $dst"
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
// SSE integer instructions
// Move Instructions
@@ -1825,14 +1859,17 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr,
Intrinsic IntId, Intrinsic IntId2> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
+ VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
+ i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2))))]>;
- def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (bitconvert (memopv2i64 addr:$src2))))]>;
+ def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1,
+ i32i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
}
@@ -1840,15 +1877,17 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
+ VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
let isCommutable = Commutable;
}
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
+ i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2)))))]>;
+ (bitconvert (memopv2i64 addr:$src2)))))]>;
}
/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
@@ -1858,14 +1897,17 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
///
multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit Commutable = 0> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
let isCommutable = Commutable;
}
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64 addr:$src2)))]>;
+ [(set VR128:$dst, (OpNode VR128:$src1,
+ (memopv2i64 addr:$src2)))]>;
}
} // Constraints = "$src1 = $dst"
@@ -2029,8 +2071,8 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (pshufd:$src2
- (bc_v4i32(memopv2i64 addr:$src1)),
- (undef))))]>;
+ (bc_v4i32(memopv2i64 addr:$src1)),
+ (undef))))]>;
// SSE2 with ImmT == Imm8 and XS prefix.
def PSHUFHWri : Ii8<0x70, MRMSrcReg,
@@ -2043,8 +2085,8 @@ def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (pshufhw:$src2
- (bc_v8i16 (memopv2i64 addr:$src1)),
- (undef))))]>,
+ (bc_v8i16 (memopv2i64 addr:$src1)),
+ (undef))))]>,
XS, Requires<[HasSSE2]>;
// SSE2 with ImmT == Imm8 and XD prefix.
@@ -2064,90 +2106,90 @@ def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
let Constraints = "$src1 = $dst" in {
- def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
+ def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v16i8 (unpckl VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
+ def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(unpckl VR128:$src1,
(bc_v16i8 (memopv2i64 addr:$src2))))]>;
- def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
+ def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v8i16 (unpckl VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
+ def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(unpckl VR128:$src1,
(bc_v8i16 (memopv2i64 addr:$src2))))]>;
- def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
+ def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4i32 (unpckl VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
+ def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(unpckl VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2))))]>;
- def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+ def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+ def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2i64 (unpckl VR128:$src1,
(memopv2i64 addr:$src2))))]>;
-
- def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
+
+ def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v16i8 (unpckh VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
+ def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckh VR128:$src1,
+ [(set VR128:$dst,
+ (unpckh VR128:$src1,
(bc_v16i8 (memopv2i64 addr:$src2))))]>;
- def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
+ def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v8i16 (unpckh VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
+ def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(unpckh VR128:$src1,
(bc_v8i16 (memopv2i64 addr:$src2))))]>;
- def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
+ def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4i32 (unpckh VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
+ def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(unpckh VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2))))]>;
- def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+ def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+ def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
@@ -2172,7 +2214,7 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1,
i16mem:$src2, i32i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
+ [(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
imm:$src3))]>;
}
@@ -2202,7 +2244,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
+ [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
TB, Requires<[HasSSE2]>;
// Flush cache
@@ -2217,17 +2259,18 @@ def MFENCE : I<0xAE, MRM6r, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
//TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
(i8 0)), (NOOP)>;
def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
(i8 1)), (MFENCE)>;
// Alias instructions that map zero vector to pxor / xorp* for sse.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1 in
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
"pcmpeqd\t$dst, $dst",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
@@ -2240,7 +2283,7 @@ def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
(v2f64 (scalar_to_vector FR64:$src)))]>;
def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"movsd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
+ [(set VR128:$dst,
(v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
@@ -2399,9 +2442,9 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
(MOVZPQILo2PQIrm addr:$src)>;
}
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
// SSE3 Instructions
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
// Move Instructions
def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -2525,9 +2568,9 @@ let AddedComplexity = 20 in
def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
(MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
// SSSE3 Instructions
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
@@ -2801,12 +2844,13 @@ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
-// extload f32 -> f64. This matches load+fextend because we have a hack in
-// the isel (PreprocessForFPConvert) that can introduce loads after dag combine.
+// extload f32 -> f64. This matches load+fextend because we have a hack in
+// the isel (PreprocessForFPConvert) that can introduce loads after dag
+// combine.
// Since these loads aren't folded into the fextend, we have to match it
// explicitly here.
let Predicates = [HasSSE2] in
@@ -2884,12 +2928,12 @@ def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
Requires<[HasSSE2]>;
// Special unary SHUFPDrri case.
def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFPDrri VR128:$src1, VR128:$src1,
(SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>;
// Special unary SHUFPDrri case.
def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFPDrri VR128:$src1, VR128:$src1,
(SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>;
// Unary v4f32 shuffle with PSHUF* in order to fold a load.
@@ -2899,16 +2943,16 @@ def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
// Special binary v4i32 shuffle cases with SHUFPS.
def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
- (SHUFPSrri VR128:$src1, VR128:$src2,
+ (SHUFPSrri VR128:$src1, VR128:$src2,
(SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>;
def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (SHUFPSrmi VR128:$src1, addr:$src2,
+ (SHUFPSrmi VR128:$src1, addr:$src2,
(SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>;
// Special binary v2i64 shuffle cases using SHUFPDrri.
def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
- (SHUFPDrri VR128:$src1, VR128:$src2,
+ (SHUFPDrri VR128:$src1, VR128:$src2,
(SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>;
@@ -3030,7 +3074,7 @@ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
// fall back to this for SSE1)
def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
- (SHUFPSrri VR128:$src2, VR128:$src1,
+ (SHUFPSrri VR128:$src2, VR128:$src1,
(SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>;
// Set lowest element and zero upper elements.
@@ -3097,7 +3141,7 @@ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
-
+
//===----------------------------------------------------------------------===//
// SSE4.1 Instructions
//===----------------------------------------------------------------------===//
@@ -3108,7 +3152,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
Intrinsic V2F64Int> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
- def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
+ def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -3149,41 +3193,41 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
Intrinsic F64Int> {
// Intrinsic operation, reg.
def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
+ [(set VR128:$dst,
(F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
OpSize;
// Intrinsic operation, mem.
- def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
- (outs VR128:$dst),
+ def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
+ [(set VR128:$dst,
(F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
OpSize;
// Intrinsic operation, reg.
def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
+ [(set VR128:$dst,
(F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
OpSize;
// Intrinsic operation, mem.
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
+ [(set VR128:$dst,
(F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
OpSize;
}
@@ -3302,9 +3346,9 @@ let Constraints = "$src1 = $dst" in {
Intrinsic IntId128, bit Commutable = 0> {
def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
+ [(set VR128:$dst,
(IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
OpSize {
let isCommutable = Commutable;
@@ -3339,7 +3383,7 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
OpSize;
@@ -3471,13 +3515,13 @@ def : Pat<(int_x86_sse41_pmovzxbq
multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
(ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
OpSize;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// FIXME:
@@ -3492,7 +3536,7 @@ defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// FIXME:
@@ -3507,13 +3551,13 @@ defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
(ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32:$dst,
(extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (v4i32 VR128:$src1), imm:$src2),
addr:$dst)]>, OpSize;
@@ -3527,14 +3571,14 @@ defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
(ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32:$dst,
(extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
OpSize;
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
addr:$dst)]>, OpSize;
@@ -3553,15 +3597,15 @@ let Constraints = "$src1 = $dst" in {
multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
+ [(set VR128:$dst,
(X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
+ [(set VR128:$dst,
(X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
imm:$src3))]>, OpSize;
}
@@ -3573,16 +3617,16 @@ let Constraints = "$src1 = $dst" in {
multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
+ [(set VR128:$dst,
(v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
OpSize;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
+ [(set VR128:$dst,
(v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
imm:$src3)))]>, OpSize;
}
@@ -3590,37 +3634,57 @@ let Constraints = "$src1 = $dst" in {
defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+// insertps has a few different modes, there's the first two here below which
+// are optimized inserts that won't zero arbitrary elements in the destination
+// vector. The next one matches the intrinsic and could zero arbitrary elements
+// in the target vector.
let Constraints = "$src1 = $dst" in {
multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, FR32:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (X86insrtps VR128:$src1, FR32:$src2, imm:$src3))]>, OpSize;
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (X86insrtps VR128:$src1, (loadf32 addr:$src2),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>, OpSize;
}
}
defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+ (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
+
+// ptest instruction we'll lower to this in X86ISelLowering primarily from
+// the intel intrinsic that corresponds to this.
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(X86ptest VR128:$src1, VR128:$src2),
+ (implicit EFLAGS)]>, OpSize;
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(X86ptest VR128:$src1, (load addr:$src2)),
+ (implicit EFLAGS)]>, OpSize;
}
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 Instructions
+//===----------------------------------------------------------------------===//
+
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
let Constraints = "$src1 = $dst" in {
multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
@@ -3647,3 +3711,171 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
(PCMPGTQrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
(PCMPGTQrm VR128:$src1, addr:$src2)>;
+
+// crc intrinsic instruction
+// This set of instructions are only rm, the only difference is the size
+// of r and m.
+let Constraints = "$src1 = $dst" in {
+ def CRC32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i8mem:$src2),
+ "crc32 \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_8 GR32:$src1,
+ (load addr:$src2)))]>, OpSize;
+ def CRC32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR8:$src2),
+ "crc32 \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>,
+ OpSize;
+ def CRC32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i16mem:$src2),
+ "crc32 \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_16 GR32:$src1,
+ (load addr:$src2)))]>,
+ OpSize;
+ def CRC32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR16:$src2),
+ "crc32 \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_16 GR32:$src1, GR16:$src2))]>,
+ OpSize;
+ def CRC32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "crc32 \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32 GR32:$src1,
+ (load addr:$src2)))]>, OpSize;
+ def CRC32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "crc32 \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>,
+ OpSize;
+ def CRC64m64 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i64mem:$src2),
+ "crc32 \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64 GR64:$src1,
+ (load addr:$src2)))]>,
+ OpSize, REX_W;
+ def CRC64r64 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "crc32 \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>,
+ OpSize, REX_W;
+}
+
+// String/text processing instructions.
+let Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
+def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "#PCMPISTRM128rr PSEUDO!",
+ [(set VR128:$dst,
+ (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
+ imm:$src3))]>, OpSize;
+def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "#PCMPISTRM128rm PSEUDO!",
+ [(set VR128:$dst,
+ (int_x86_sse42_pcmpistrm128 VR128:$src1,
+ (load addr:$src2),
+ imm:$src3))]>, OpSize;
+}
+
+let Defs = [XMM0, EFLAGS] in {
+def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+ []>, OpSize;
+def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+ []>, OpSize;
+}
+
+let Defs = [EFLAGS], Uses = [EAX, EDX],
+ usesCustomDAGSchedInserter = 1 in {
+def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "#PCMPESTRM128rr PSEUDO!",
+ [(set VR128:$dst,
+ (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
+ VR128:$src3,
+ EDX, imm:$src5))]>, OpSize;
+def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "#PCMPESTRM128rm PSEUDO!",
+ [(set VR128:$dst,
+ (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
+ (load addr:$src3),
+ EDX, imm:$src5))]>, OpSize;
+}
+
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}",
+ []>, OpSize;
+def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}",
+ []>, OpSize;
+}
+
+let Defs = [ECX, EFLAGS] in {
+ multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
+ def rr : SS42AI<0x63, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+ [(set ECX,
+ (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def rm : SS42AI<0x63, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+ [(set ECX,
+ (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
+ (implicit EFLAGS)]>,
+ OpSize;
+ }
+}
+
+defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
+defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
+defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
+defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
+defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
+defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+
+let Defs = [ECX, EFLAGS] in {
+let Uses = [EAX, EDX] in {
+ multiclass SS42AI_pcmpestri<Intrinsic IntId128> {
+ def rr : SS42AI<0x61, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
+ [(set ECX,
+ (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def rm : SS42AI<0x61, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
+ [(set ECX,
+ (IntId128 VR128:$src1, EAX, (load addr:$src3),
+ EDX, imm:$src5)),
+ (implicit EFLAGS)]>,
+ OpSize;
+ }
+}
+}
+
+defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
+defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
+defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
+defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
+defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
+defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index f923106..62ca47f 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -15,15 +15,16 @@
#include "X86JITInfo.h"
#include "X86Relocations.h"
#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
#include "llvm/Function.h"
-#include "llvm/Config/alloca.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
#include <cstdlib>
#include <cstring>
using namespace llvm;
// Determine the platform we're running on
-#if defined (__x86_64__) || defined (_M_AMD64)
+#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
# define X86_64_JIT
#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
# define X86_32_JIT
@@ -51,13 +52,6 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
#define GETASMPREFIX(X) GETASMPREFIX2(X)
#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
-// Check if building with -fPIC
-#if defined(__PIC__) && __PIC__ && defined(__linux__)
-#define ASMCALLSUFFIX "@PLT"
-#else
-#define ASMCALLSUFFIX
-#endif
-
// For ELF targets, use a .size and .type directive, to let tools
// know the extent of functions defined in assembler.
#if defined(__ELF__)
@@ -130,7 +124,7 @@ extern "C" {
// JIT callee
"movq %rbp, %rdi\n" // Pass prev frame and return address
"movq 8(%rbp), %rsi\n"
- "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
// Restore all XMM arg registers
"movaps 112(%rsp), %xmm7\n"
"movaps 96(%rsp), %xmm6\n"
@@ -206,7 +200,7 @@ extern "C" {
"movl 4(%ebp), %eax\n" // Pass prev frame and return address
"movl %eax, 4(%esp)\n"
"movl %ebp, (%esp)\n"
- "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
"movl %ebp, %esp\n" // Restore ESP
CFI(".cfi_def_cfa_register %esp\n")
"subl $12, %esp\n"
@@ -262,7 +256,7 @@ extern "C" {
"movl 4(%ebp), %eax\n" // Pass prev frame and return address
"movl %eax, 4(%esp)\n"
"movl %ebp, (%esp)\n"
- "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
"addl $16, %esp\n"
"movaps 48(%esp), %xmm3\n"
CFI(".cfi_restore %xmm3\n")
@@ -321,8 +315,7 @@ extern "C" {
#else // Not an i386 host
void X86CompilationCallback() {
- assert(0 && "Cannot call X86CompilationCallback() on a non-x86 arch!\n");
- abort();
+ llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!");
}
#endif
}
@@ -331,14 +324,21 @@ extern "C" {
/// function stub when we did not know the real target of a call. This function
/// must locate the start of the stub or call site and pass it into the JIT
/// compiler function.
-extern "C" void ATTRIBUTE_USED
+extern "C" {
+#if !(defined (X86_64_JIT) && defined(_MSC_VER))
+ // the following function is called only from this translation unit,
+ // unless we are under 64bit Windows with MSC, where there is
+ // no support for inline assembly
+static
+#endif
+void ATTRIBUTE_USED
X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
intptr_t *RetAddrLoc = &StackPtr[1];
assert(*RetAddrLoc == RetAddr &&
"Could not find return address on the stack!");
// It's a stub if there is an interrupt marker after the call.
- bool isStub = ((unsigned char*)RetAddr)[0] == 0xCD;
+ bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE;
// The call instruction should have pushed the return value onto the stack...
#if defined (X86_64_JIT)
@@ -348,10 +348,10 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
#endif
#if 0
- DOUT << "In callback! Addr=" << (void*)RetAddr
- << " ESP=" << (void*)StackPtr
- << ": Resolving call to function: "
- << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n";
+ DEBUG(errs() << "In callback! Addr=" << (void*)RetAddr
+ << " ESP=" << (void*)StackPtr
+ << ": Resolving call to function: "
+ << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n");
#endif
// Sanity check to make sure this really is a call instruction.
@@ -377,7 +377,7 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
// If this is a stub, rewrite the call into an unconditional branch
// instruction so that two return addresses are not pushed onto the stack
// when the requested function finally gets called. This also makes the
- // 0xCD byte (interrupt) dead, so the marker doesn't effect anything.
+ // 0xCE byte (interrupt) dead, so the marker doesn't effect anything.
#if defined (X86_64_JIT)
// If the target address is within 32-bit range of the stub, use a
// PC-relative branch instead of loading the actual address. (This is
@@ -403,31 +403,26 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
*RetAddrLoc -= 5;
#endif
}
+}
TargetJITInfo::LazyResolverFn
X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
JITCompilerFunction = F;
#if defined (X86_32_JIT) && !defined (_MSC_VER)
- unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
- union {
- unsigned u[3];
- char c[12];
- } text;
-
- if (!X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) {
- // FIXME: support for AMD family of processors.
- if (memcmp(text.c, "GenuineIntel", 12) == 0) {
- X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
- if ((EDX >> 25) & 0x1)
- return X86CompilationCallback_SSE;
- }
- }
+ if (Subtarget->hasSSE1())
+ return X86CompilationCallback_SSE;
#endif
return X86CompilationCallback;
}
+X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ useGOT = 0;
+ TLSOffset = 0;
+}
+
void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
JITCodeEmitter &JCE) {
#if defined (X86_64_JIT)
@@ -485,7 +480,10 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Fn,
JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
#endif
- JCE.emitByte(0xCD); // Interrupt - Just a marker identifying the stub!
+ // This used to use 0xCD, but that value is used by JITMemoryManager to
+ // initialize the buffer with garbage, which means it may follow a
+ // noreturn function call, confusing X86CompilationCallback2. PR 4929.
+ JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub!
return JCE.finishGVStub(F);
}
@@ -495,9 +493,11 @@ void X86JITInfo::emitFunctionStubAtAddr(const Function* F, void *Fn, void *Stub,
// complains about casting a function pointer to a normal pointer.
JCE.startGVStub(F, Stub, 5);
JCE.emitByte(0xE9);
-#if defined (X86_64_JIT)
- assert(((((intptr_t)Fn-JCE.getCurrentPCValue()-5) << 32) >> 32) ==
- ((intptr_t)Fn-JCE.getCurrentPCValue()-5)
+#if defined (X86_64_JIT) && !defined (NDEBUG)
+ // Yes, we need both of these casts, or some broken versions of GCC (4.2.4)
+ // get the signed-ness of the expression wrong. Go figure.
+ intptr_t Displacement = (intptr_t)Fn - (intptr_t)JCE.getCurrentPCValue() - 5;
+ assert(((Displacement << 32) >> 32) == Displacement
&& "PIC displacement does not fit in displacement field!");
#endif
JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
@@ -538,6 +538,7 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
break;
}
case X86::reloc_absolute_word:
+ case X86::reloc_absolute_word_sext:
// Absolute relocation, just add the relocated value to the value already
// in memory.
*((unsigned*)RelocPos) += (unsigned)ResultPtr;
@@ -554,7 +555,7 @@ char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
TLSOffset -= size;
return TLSOffset;
#else
- assert(0 && "Cannot allocate thread local storage on this arch!\n");
+ llvm_unreachable("Cannot allocate thread local storage on this arch!");
return 0;
#endif
}
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index 6a4e214..c381433 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -20,16 +20,15 @@
namespace llvm {
class X86TargetMachine;
+ class X86Subtarget;
class X86JITInfo : public TargetJITInfo {
X86TargetMachine &TM;
+ const X86Subtarget *Subtarget;
uintptr_t PICBase;
char* TLSOffset;
public:
- explicit X86JITInfo(X86TargetMachine &tm) : TM(tm) {
- useGOT = 0;
- TLSOffset = 0;
- }
+ explicit X86JITInfo(X86TargetMachine &tm);
/// replaceMachineCodeForFunction - Make it so that calling the function
/// whose machine code is at OLD turns into a call to NEW, perhaps by
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
new file mode 100644
index 0000000..9d7e66d
--- /dev/null
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -0,0 +1,123 @@
+//===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the X86MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCAsmInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+enum AsmWriterFlavorTy {
+ // Note: This numbering has to match the GCC assembler dialects for inline
+ // asm alternatives to work right.
+ ATT = 0, Intel = 1
+};
+
+static cl::opt<AsmWriterFlavorTy>
+AsmWriterFlavor("x86-asm-syntax", cl::init(ATT),
+ cl::desc("Choose style of code to emit from X86 backend:"),
+ cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"),
+ clEnumValN(Intel, "intel", "Emit Intel-style assembly"),
+ clEnumValEnd));
+
+
+static const char *const x86_asm_table[] = {
+ "{si}", "S",
+ "{di}", "D",
+ "{ax}", "a",
+ "{cx}", "c",
+ "{memory}", "memory",
+ "{flags}", "",
+ "{dirflag}", "",
+ "{fpsr}", "",
+ "{cc}", "cc",
+ 0,0};
+
+X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+
+ bool is64Bit = Triple.getArch() == Triple::x86_64;
+
+ TextAlignFillValue = 0x90;
+
+ if (!is64Bit)
+ Data64bitsDirective = 0; // we can't emit a 64-bit unit
+
+ // Leopard and above support aligned common symbols.
+ COMMDirectiveTakesAlignment = Triple.getDarwinMajorNumber() >= 9;
+
+ CommentString = "##";
+ PCSymbol = ".";
+
+ SupportsDebugInformation = true;
+ DwarfUsesInlineInfoSection = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::Dwarf;
+ AbsoluteEHSectionOffsets = false;
+}
+
+X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) {
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ SetDirective = "\t.set\t";
+ PCSymbol = ".";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+
+ // Debug Information
+ AbsoluteDebugSectionOffsets = true;
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::Dwarf;
+ AbsoluteEHSectionOffsets = false;
+
+ // On Linux we must declare when we can use a non-executable stack.
+ if (Triple.getOS() == Triple::Linux)
+ NonexecutableStackDirective = "\t.section\t.note.GNU-stack,\"\",@progbits";
+}
+
+X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+}
+
+
+X86WinMCAsmInfo::X86WinMCAsmInfo(const Triple &Triple) {
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+
+ GlobalPrefix = "_";
+ CommentString = ";";
+
+ PrivateGlobalPrefix = "$";
+ AlignDirective = "\tALIGN\t";
+ ZeroDirective = "\tdb\t";
+ ZeroDirectiveSuffix = " dup(0)";
+ AsciiDirective = "\tdb\t";
+ AscizDirective = 0;
+ Data8bitsDirective = "\tdb\t";
+ Data16bitsDirective = "\tdw\t";
+ Data32bitsDirective = "\tdd\t";
+ Data64bitsDirective = "\tdq\t";
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = false;
+
+ AlignmentIsInBytes = true;
+}
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h
new file mode 100644
index 0000000..18e2bdb
--- /dev/null
+++ b/lib/Target/X86/X86MCAsmInfo.h
@@ -0,0 +1,42 @@
+//=====-- X86MCAsmInfo.h - X86 asm properties -----------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the X86MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETASMINFO_H
+#define X86TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+ class Triple;
+
+ struct X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+ explicit X86MCAsmInfoDarwin(const Triple &Triple);
+ };
+
+ struct X86ELFMCAsmInfo : public MCAsmInfo {
+ explicit X86ELFMCAsmInfo(const Triple &Triple);
+ };
+
+ struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
+ explicit X86MCAsmInfoCOFF(const Triple &Triple);
+ };
+
+ struct X86WinMCAsmInfo : public MCAsmInfo {
+ explicit X86WinMCAsmInfo(const Triple &Triple);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index a2f319f..f03723a 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -30,14 +30,16 @@
#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
@@ -54,6 +56,7 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
Is64Bit = Subtarget->is64Bit();
IsWin64 = Subtarget->isTargetWin64();
StackAlign = TM.getFrameInfo()->getStackAlignment();
+
if (Is64Bit) {
SlotSize = 8;
StackPtr = X86::RSP;
@@ -65,12 +68,12 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
}
}
-// getDwarfRegNum - This function maps LLVM register identifiers to the
-// Dwarf specific numbering, used in debug info and exception tables.
-
+/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
+/// specific numbering, used in debug info and exception tables.
int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
unsigned Flavour = DWARFFlavour::X86_64;
+
if (!Subtarget->is64Bit()) {
if (Subtarget->isTargetDarwin()) {
if (isEH)
@@ -88,9 +91,8 @@ int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
}
-// getX86RegNum - This function maps LLVM register identifiers to their X86
-// specific numbering, which is used in various places encoding instructions.
-//
+/// getX86RegNum - This function maps LLVM register identifiers to their X86
+/// specific numbering, which is used in various places encoding instructions.
unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
switch(RegNo) {
case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
@@ -146,17 +148,131 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
default:
assert(isVirtualRegister(RegNo) && "Unknown physical register!");
- assert(0 && "Register allocator hasn't allocated reg correctly yet!");
+ llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
return 0;
}
}
-const TargetRegisterClass *X86RegisterInfo::getPointerRegClass() const {
- const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
- if (Subtarget->is64Bit())
- return &X86::GR64RegClass;
- else
+const TargetRegisterClass *
+X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned SubIdx) const {
+ switch (SubIdx) {
+ default: return 0;
+ case 1:
+ // 8-bit
+ if (B == &X86::GR8RegClass) {
+ if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
+ return A;
+ } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+ A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass ||
+ A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+ A == &X86::GR32_NOREXRegClass ||
+ A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_ABCDRegClass;
+ else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
+ A == &X86::GR16_NOREXRegClass)
+ return &X86::GR16_ABCDRegClass;
+ } else if (B == &X86::GR8_NOREXRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREXRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
+ A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_NOREXRegClass;
+ else if (A == &X86::GR32_ABCDRegClass)
+ return &X86::GR32_ABCDRegClass;
+ else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
+ return &X86::GR16_NOREXRegClass;
+ else if (A == &X86::GR16_ABCDRegClass)
+ return &X86::GR16_ABCDRegClass;
+ }
+ break;
+ case 2:
+ // 8-bit hi
+ if (B == &X86::GR8_ABCD_HRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+ A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass ||
+ A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+ A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_ABCDRegClass;
+ else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
+ A == &X86::GR16_NOREXRegClass)
+ return &X86::GR16_ABCDRegClass;
+ }
+ break;
+ case 3:
+ // 16-bit
+ if (B == &X86::GR16RegClass) {
+ if (A->getSize() == 4 || A->getSize() == 8)
+ return A;
+ } else if (B == &X86::GR16_ABCDRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+ A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass ||
+ A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+ A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_ABCDRegClass;
+ } else if (B == &X86::GR16_NOREXRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREXRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
+ A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_NOREXRegClass;
+ else if (A == &X86::GR32_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ }
+ break;
+ case 4:
+ // 32-bit
+ if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
+ if (A->getSize() == 8)
+ return A;
+ } else if (B == &X86::GR32_ABCDRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+ A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass ||
+ A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_ABCDRegClass;
+ } else if (B == &X86::GR32_NOREXRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREXRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ }
+ break;
+ }
+ return 0;
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
+ switch (Kind) {
+ default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
+ case 0: // Normal GPRs.
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return &X86::GR64RegClass;
return &X86::GR32RegClass;
+ case 1: // Normal GRPs except the stack pointer (for encoding reasons).
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return &X86::GR64_NOSPRegClass;
+ return &X86::GR32_NOSPRegClass;
+ }
}
const TargetRegisterClass *
@@ -276,6 +392,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(X86::ESP);
Reserved.set(X86::SP);
Reserved.set(X86::SPL);
+
// Set the frame-pointer register and its aliases as reserved if needed.
if (hasFP(MF)) {
Reserved.set(X86::RBP);
@@ -283,10 +400,10 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(X86::BP);
Reserved.set(X86::BPL);
}
- // Mark the x87 stack registers as reserved, since they don't
- // behave normally with respect to liveness. We don't fully
- // model the effects of x87 stack pushes and pops after
- // stackification.
+
+ // Mark the x87 stack registers as reserved, since they don't behave normally
+ // with respect to liveness. We don't fully model the effects of x87 stack
+ // pushes and pops after stackification.
Reserved.set(X86::ST0);
Reserved.set(X86::ST1);
Reserved.set(X86::ST2);
@@ -304,10 +421,12 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
unsigned MaxAlign = 0;
+
for (int i = FFI->getObjectIndexBegin(),
e = FFI->getObjectIndexEnd(); i != e; ++i) {
if (FFI->isDeadObjectIndex(i))
continue;
+
unsigned Align = FFI->getObjectAlignment(i);
MaxAlign = std::max(MaxAlign, Align);
}
@@ -315,10 +434,9 @@ static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
return MaxAlign;
}
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
@@ -335,7 +453,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
// FIXME: Currently we don't support stack realignment for functions with
- // variable-sized allocas
+ // variable-sized allocas
return (RealignStack &&
(MFI->getMaxAlignment() > StackAlign &&
!MFI->hasVarSizedObjects()));
@@ -345,34 +463,45 @@ bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
+bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
+ int &FrameIdx) const {
+ if (Reg == FramePtr && hasFP(MF)) {
+ FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
+ return true;
+ }
+ return false;
+}
+
int
X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
- int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize;
- uint64_t StackSize = MF.getFrameInfo()->getStackSize();
+ const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
+ uint64_t StackSize = MFI->getStackSize();
if (needsStackRealignment(MF)) {
- if (FI < 0)
- // Skip the saved EBP
+ if (FI < 0) {
+ // Skip the saved EBP.
Offset += SlotSize;
- else {
- unsigned Align = MF.getFrameInfo()->getObjectAlignment(FI);
+ } else {
+ unsigned Align = MFI->getObjectAlignment(FI);
assert( (-(Offset + StackSize)) % Align == 0);
Align = 0;
return Offset + StackSize;
}
-
// FIXME: Support tail calls
} else {
if (!hasFP(MF))
return Offset + StackSize;
- // Skip the saved EBP
+ // Skip the saved EBP.
Offset += SlotSize;
// Skip the RETADDR move area
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta;
+ if (TailCallReturnAddrDelta < 0)
+ Offset -= TailCallReturnAddrDelta;
}
return Offset;
@@ -392,24 +521,29 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+ Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
MachineInstr *New = 0;
if (Old->getOpcode() == getCallFrameSetupOpcode()) {
New = BuildMI(MF, Old->getDebugLoc(),
TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri),
- StackPtr).addReg(StackPtr).addImm(Amount);
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(Amount);
} else {
assert(Old->getOpcode() == getCallFrameDestroyOpcode());
- // factor out the amount the callee already popped.
+
+ // Factor out the amount the callee already popped.
uint64_t CalleeAmt = Old->getOperand(1).getImm();
Amount -= CalleeAmt;
- if (Amount) {
+
+ if (Amount) {
unsigned Opc = (Amount < 128) ?
(Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
(Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
- .addReg(StackPtr).addImm(Amount);
+ .addReg(StackPtr)
+ .addImm(Amount);
}
}
@@ -417,7 +551,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// The EFLAGS implicit def is dead.
New->getOperand(3).setIsDead();
- // Replace the pseudo instruction with a new instruction...
+ // Replace the pseudo instruction with a new instruction.
MBB.insert(I, New);
}
}
@@ -432,10 +566,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *Old = I;
MachineInstr *New =
BuildMI(MF, Old->getDebugLoc(), TII.get(Opc),
- StackPtr).addReg(StackPtr).addImm(CalleeAmt);
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(CalleeAmt);
+
// The EFLAGS implicit def is dead.
New->getOperand(3).setIsDead();
-
MBB.insert(I, New);
}
}
@@ -443,21 +579,24 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const{
+unsigned
+X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, int *Value,
+ RegScavenger *RS) const{
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
+
while (!MI.getOperand(i).isFI()) {
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
int FrameIndex = MI.getOperand(i).getIndex();
-
unsigned BasePtr;
+
if (needsStackRealignment(MF))
BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
else
@@ -471,34 +610,33 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (MI.getOperand(i+3).isImm()) {
// Offset is a 32-bit integer.
int Offset = getFrameIndexOffset(MF, FrameIndex) +
- (int)(MI.getOperand(i+3).getImm());
+ (int)(MI.getOperand(i + 3).getImm());
- MI.getOperand(i+3).ChangeToImmediate(Offset);
+ MI.getOperand(i + 3).ChangeToImmediate(Offset);
} else {
// Offset is symbolic. This is extremely rare.
uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) +
(uint64_t)MI.getOperand(i+3).getOffset();
MI.getOperand(i+3).setOffset(Offset);
}
+ return 0;
}
void
X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
- MachineFrameInfo *FFI = MF.getFrameInfo();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
- unsigned MaxAlign = std::max(FFI->getMaxAlignment(),
- calculateMaxStackAlignment(FFI));
+ unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
+ calculateMaxStackAlignment(MFI));
- FFI->setMaxAlignment(MaxAlign);
-}
+ MFI->setMaxAlignment(MaxAlign);
-void
-X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
if (TailCallReturnAddrDelta < 0) {
// create RETURNADDR area
// arg
@@ -509,18 +647,21 @@ X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
// ...
// }
// [EBP]
- MF.getFrameInfo()->
- CreateFixedObject(-TailCallReturnAddrDelta,
- (-1*SlotSize)+TailCallReturnAddrDelta);
+ MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+ (-1U*SlotSize)+TailCallReturnAddrDelta);
}
+
if (hasFP(MF)) {
assert((TailCallReturnAddrDelta <= 0) &&
"The Delta should always be zero or negative");
+ const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+
// Create a frame entry for the EBP register that must be saved.
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,
- (int)SlotSize * -2+
- TailCallReturnAddrDelta);
- assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ int FrameIdx = MFI->CreateFixedObject(SlotSize,
+ -(int)SlotSize +
+ TFI.getOffsetOfLocalArea() +
+ TailCallReturnAddrDelta);
+ assert(FrameIdx == MFI->getObjectIndexBegin() &&
"Slot for EBP register must be last in order to be found!");
FrameIdx = 0;
}
@@ -549,14 +690,14 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
MachineInstr *MI =
BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr).addImm(ThisVal);
- // The EFLAGS implicit def is dead.
- MI->getOperand(3).setIsDead();
+ .addReg(StackPtr)
+ .addImm(ThisVal);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
Offset -= ThisVal;
}
}
-// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
+/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
static
void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, uint64_t *NumBytes = NULL) {
@@ -579,11 +720,12 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
}
}
-// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
+/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
static
void mergeSPUpdatesDown(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ // FIXME: THIS ISN'T RUN!!!
return;
if (MBBI == MBB.end()) return;
@@ -610,23 +752,22 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB,
}
/// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB instruction it is deleted
-/// argument and the stack adjustment is returned as a positive value for ADD
-/// and a negative for SUB.
+/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD and a negative for
+/// SUB.
static int mergeSPUpdates(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
unsigned StackPtr,
bool doMergeWithPrevious) {
-
if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
(!doMergeWithPrevious && MBBI == MBB.end()))
return 0;
- int Offset = 0;
-
MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI);
unsigned Opc = PI->getOpcode();
+ int Offset = 0;
+
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
PI->getOperand(0).getReg() == StackPtr){
@@ -644,122 +785,116 @@ static int mergeSPUpdates(MachineBasicBlock &MBB,
return Offset;
}
-void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
- unsigned FrameLabelId,
- unsigned ReadyLabelId) const {
+void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
+ unsigned LabelId,
+ unsigned FramePtr) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
- if (!MMI)
- return;
+ if (!MMI) return;
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ if (CSI.empty()) return;
- uint64_t StackSize = MFI->getStackSize();
std::vector<MachineMove> &Moves = MMI->getFrameMoves();
const TargetData *TD = MF.getTarget().getTargetData();
+ bool HasFP = hasFP(MF);
- // Calculate amount of bytes used for return address storing
+ // Calculate amount of bytes used for return address storing.
int stackGrowth =
(MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
TargetFrameInfo::StackGrowsUp ?
TD->getPointerSize() : -TD->getPointerSize());
- MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
- MachineLocation FPSrc(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
-
- if (StackSize) {
- // Show update of SP.
- if (hasFP(MF)) {
- // Adjust SP
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP, 2*stackGrowth);
- Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
- } else {
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP,
- -StackSize+stackGrowth);
- Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
- }
- } else {
- // FIXME: Verify & implement for FP
- MachineLocation SPDst(StackPtr);
- MachineLocation SPSrc(StackPtr, stackGrowth);
- Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
- }
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-
// FIXME: This is dirty hack. The code itself is pretty mess right now.
// It should be rewritten from scratch and generalized sometimes.
- // Determine maximum offset (minumum due to stack growth)
+ // Determine maximum offset (minumum due to stack growth).
int64_t MaxOffset = 0;
- for (unsigned I = 0, E = CSI.size(); I!=E; ++I)
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I)
MaxOffset = std::min(MaxOffset,
- MFI->getObjectOffset(CSI[I].getFrameIdx()));
-
- // Calculate offsets
- int64_t saveAreaOffset = (hasFP(MF) ? 3 : 2)*stackGrowth;
- for (unsigned I = 0, E = CSI.size(); I!=E; ++I) {
- int64_t Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
- unsigned Reg = CSI[I].getReg();
- Offset = (MaxOffset-Offset+saveAreaOffset);
+ MFI->getObjectOffset(I->getFrameIdx()));
+
+ // Calculate offsets.
+ int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+ unsigned Reg = I->getReg();
+ Offset = MaxOffset - Offset + saveAreaOffset;
+
+ // Don't output a new machine move if we're re-saving the frame
+ // pointer. This happens when the PrologEpilogInserter has inserted an extra
+ // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
+ // generates one when frame pointers are used. If we generate a "machine
+ // move" for this extra "PUSH", the linker will lose track of the fact that
+ // the frame pointer should have the value of the first "PUSH" when it's
+ // trying to unwind.
+ //
+ // FIXME: This looks inelegant. It's possibly correct, but it's covering up
+ // another bug. I.e., one where we generate a prolog like this:
+ //
+ // pushl %ebp
+ // movl %esp, %ebp
+ // pushl %ebp
+ // pushl %esi
+ // ...
+ //
+ // The immediate re-push of EBP is unnecessary. At the least, it's an
+ // optimization bug. EBP can be used as a scratch register in certain
+ // cases, but probably not when we have a frame pointer.
+ if (HasFP && FramePtr == Reg)
+ continue;
+
MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
MachineLocation CSSrc(Reg);
- Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
- }
-
- if (hasFP(MF)) {
- // Save FP
- MachineLocation FPDst(MachineLocation::VirtualFP, 2*stackGrowth);
- MachineLocation FPSrc(FramePtr);
- Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+ Moves.push_back(MachineMove(LabelId, CSDst, CSSrc));
}
}
-
+/// emitPrologue - Push callee-saved registers onto the stack, which
+/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
+/// space for local variables. Also emit labels used by the exception handler to
+/// generate the exception handling frames.
void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+ MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const Function* Fn = MF.getFunction();
- const X86Subtarget* Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
+ const Function *Fn = MF.getFunction();
+ const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- MachineBasicBlock::iterator MBBI = MBB.begin();
bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
- !Fn->doesNotThrow() ||
- UnwindTablesMandatory;
+ !Fn->doesNotThrow() || UnwindTablesMandatory;
+ uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
+ uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
+ bool HasFP = hasFP(MF);
DebugLoc DL;
- // Prepare for frame info.
- unsigned FrameLabelId = 0;
-
- // Get the number of bytes to allocate from the FrameInfo.
- uint64_t StackSize = MFI->getStackSize();
-
- // Get desired stack alignment
- uint64_t MaxAlign = MFI->getMaxAlignment();
-
// Add RETADDR move area to callee saved frame size.
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
if (TailCallReturnAddrDelta < 0)
X86FI->setCalleeSavedFrameSize(
- X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta));
+ X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
// If this is x86-64 and the Red Zone is not disabled, if we are a leaf
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
// stack pointer (we fit in the Red Zone).
- bool DisableRedZone = Fn->hasFnAttr(Attribute::NoRedZone);
- if (Is64Bit && !DisableRedZone &&
+ if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
!needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->hasCalls() && // No calls.
!Subtarget->isTargetWin64()) { // Win64 has no Red Zone
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
- if (hasFP(MF)) MinSize += SlotSize;
- StackSize = std::max(MinSize,
- StackSize > 128 ? StackSize - 128 : 0);
+ if (HasFP) MinSize += SlotSize;
+ StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
+ MFI->setStackSize(StackSize);
+ } else if (Subtarget->isTargetWin64()) {
+ // We need to always allocate 32 bytes as register spill area.
+ // FIXME: We might reuse these 32 bytes for leaf functions.
+ StackSize += 32;
MFI->setStackSize(StackSize);
}
@@ -769,33 +904,73 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (TailCallReturnAddrDelta < 0) {
MachineInstr *MI =
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri),
- StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta);
- // The EFLAGS implicit def is dead.
- MI->getOperand(3).setIsDead();
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(-TailCallReturnAddrDelta);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
}
+ // Mapping for machine moves:
+ //
+ // DST: VirtualFP AND
+ // SRC: VirtualFP => DW_CFA_def_cfa_offset
+ // ELSE => DW_CFA_def_cfa
+ //
+ // SRC: VirtualFP AND
+ // DST: Register => DW_CFA_def_cfa_register
+ //
+ // ELSE
+ // OFFSET < 0 => DW_CFA_offset_extended_sf
+ // REG < 64 => DW_CFA_offset + Reg
+ // ELSE => DW_CFA_offset_extended
+
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+ const TargetData *TD = MF.getTarget().getTargetData();
uint64_t NumBytes = 0;
- if (hasFP(MF)) {
- // Calculate required stack adjustment
+ int stackGrowth =
+ (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize());
+
+ if (HasFP) {
+ // Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
if (needsStackRealignment(MF))
- FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+ FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
- // Get the offset of the stack slot for the EBP register... which is
+ // Get the offset of the stack slot for the EBP register, which is
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
// Update the frame offset adjustment.
MFI->setOffsetAdjustment(-NumBytes);
- // Save EBP into the appropriate stack slot...
+ // Save EBP/RBP into the appropriate stack slot.
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
.addReg(FramePtr, RegState::Kill);
if (needsFrameMoves) {
- // Mark effective beginning of when frame pointer becomes valid.
- FrameLabelId = MMI->NextLabelID();
+ // Mark the place where EBP/RBP was saved.
+ unsigned FrameLabelId = MMI->NextLabelID();
BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId);
+
+ // Define the current CFA rule to use the provided offset.
+ if (StackSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ } else {
+ // FIXME: Verify & implement for FP
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ }
+
+ // Change the rule for the FramePtr to be an "offset" rule.
+ MachineLocation FPDst(MachineLocation::VirtualFP,
+ 2 * stackGrowth);
+ MachineLocation FPSrc(FramePtr);
+ Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
}
// Update EBP with the new base value...
@@ -803,6 +978,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
.addReg(StackPtr);
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ unsigned FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId);
+
+ // Define the current CFA to use the EBP/RBP register.
+ MachineLocation FPDst(FramePtr);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
+ }
+
// Mark the FramePtr as live-in in every block except the entry.
for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
I != E; ++I)
@@ -814,6 +1000,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+
// The EFLAGS implicit def is dead.
MI->getOperand(3).setIsDead();
}
@@ -822,11 +1009,30 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
}
// Skip the callee-saved push instructions.
+ bool PushedRegs = false;
+ int StackOffset = 2 * stackGrowth;
+
while (MBBI != MBB.end() &&
(MBBI->getOpcode() == X86::PUSH32r ||
- MBBI->getOpcode() == X86::PUSH64r))
+ MBBI->getOpcode() == X86::PUSH64r)) {
+ PushedRegs = true;
++MBBI;
+ if (!HasFP && needsFrameMoves) {
+ // Mark callee-saved push instruction.
+ unsigned LabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId);
+
+ // Define the current CFA rule to use the provided offset.
+ unsigned Ptr = StackSize ?
+ MachineLocation::VirtualFP : StackPtr;
+ MachineLocation SPDst(Ptr);
+ MachineLocation SPSrc(Ptr, StackOffset);
+ Moves.push_back(MachineMove(LabelId, SPDst, SPSrc));
+ StackOffset += stackGrowth;
+ }
+ }
+
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
@@ -883,12 +1089,29 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
}
- if (needsFrameMoves) {
- unsigned ReadyLabelId = 0;
- // Mark effective beginning of when frame pointer is ready.
- ReadyLabelId = MMI->NextLabelID();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
- emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
+ if ((NumBytes || PushedRegs) && needsFrameMoves) {
+ // Mark end of stack pointer adjustment.
+ unsigned LabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId);
+
+ if (!HasFP && NumBytes) {
+ // Define the current CFA rule to use the provided offset.
+ if (StackSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP,
+ -StackSize + stackGrowth);
+ Moves.push_back(MachineMove(LabelId, SPDst, SPSrc));
+ } else {
+ // FIXME: Verify & implement for FP
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(LabelId, SPDst, SPSrc));
+ }
+ }
+
+ // Emit DWARF info specifying the offsets of the callee-saved registers.
+ if (PushedRegs)
+ emitCalleeSavedFrameMoves(MF, LabelId, HasFP ? FramePtr : StackPtr);
}
}
@@ -901,6 +1124,8 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
DebugLoc DL = MBBI->getDebugLoc();
switch (RetOpcode) {
+ default:
+ llvm_unreachable("Can only insert epilog into returning blocks");
case X86::RET:
case X86::RETI:
case X86::TCRETURNdi:
@@ -911,26 +1136,25 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
case X86::EH_RETURN64:
case X86::TAILJMPd:
case X86::TAILJMPr:
- case X86::TAILJMPm: break; // These are ok
- default:
- assert(0 && "Can only insert epilog into returning blocks");
+ case X86::TAILJMPm:
+ break; // These are ok
}
- // Get the number of bytes to allocate from the FrameInfo
+ // Get the number of bytes to allocate from the FrameInfo.
uint64_t StackSize = MFI->getStackSize();
uint64_t MaxAlign = MFI->getMaxAlignment();
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t NumBytes = 0;
if (hasFP(MF)) {
- // Calculate required stack adjustment
+ // Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
if (needsStackRealignment(MF))
FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
NumBytes = FrameSize - CSSize;
- // pop EBP.
+ // Pop EBP.
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
} else {
@@ -942,9 +1166,11 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
while (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PI = prior(MBBI);
unsigned Opc = PI->getOpcode();
+
if (Opc != X86::POP32r && Opc != X86::POP64r &&
!PI->getDesc().isTerminator())
break;
+
--MBBI;
}
@@ -957,10 +1183,10 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
// If dynamic alloca is used, then reset esp to point to the last callee-saved
// slot before popping them off! Same applies for the case, when stack was
- // realigned
+ // realigned.
if (needsStackRealignment(MF)) {
// We cannot use LEA here, because stack pointer was realigned. We need to
- // deallocate local frame back
+ // deallocate local frame back.
if (CSSize) {
emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
MBBI = prior(LastCSPop);
@@ -972,17 +1198,18 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
} else if (MFI->hasVarSizedObjects()) {
if (CSSize) {
unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
- MachineInstr *MI = addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
- FramePtr, false, -CSSize);
+ MachineInstr *MI =
+ addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+ FramePtr, false, -CSSize);
MBB.insert(MBBI, MI);
- } else
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
- StackPtr).addReg(FramePtr);
-
- } else {
- // adjust stack pointer back: ESP += numbytes
- if (NumBytes)
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
+ } else {
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+ .addReg(FramePtr);
+ }
+ } else if (NumBytes) {
+ // Adjust stack pointer back: ESP += numbytes.
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
}
// We're returning from function via eh_return.
@@ -993,9 +1220,9 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
StackPtr).addReg(DestAddr.getReg());
- // Tail call return: adjust the stack pointer and jump to callee
} else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) {
+ // Tail call return: adjust the stack pointer and jump to callee.
MBBI = prior(MBB.end());
MachineOperand &JumpTarget = MBBI->getOperand(0);
MachineOperand &StackAdjust = MBBI->getOperand(1);
@@ -1006,6 +1233,7 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
int MaxTCDelta = X86FI->getTCReturnAddrDelta();
int Offset = 0;
assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
+
// Incoporate the retaddr area.
Offset = StackAdj-MaxTCDelta;
assert(Offset >= 0 && "Offset should never be negative");
@@ -1032,6 +1260,7 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
// Add the return addr area delta back since we are not tail calling.
int delta = -1*X86FI->getTCReturnAddrDelta();
MBBI = prior(MBB.end());
+
// Check for possible merge with preceeding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
@@ -1039,18 +1268,16 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
}
unsigned X86RegisterInfo::getRARegister() const {
- if (Is64Bit)
- return X86::RIP; // Should have dwarf #16
- else
- return X86::EIP; // Should have dwarf #8
+ return Is64Bit ? X86::RIP // Should have dwarf #16.
+ : X86::EIP; // Should have dwarf #8.
}
unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const {
return hasFP(MF) ? FramePtr : StackPtr;
}
-void X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
- const {
+void
+X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
// Calculate amount of bytes used for return address storing
int stackGrowth = (Is64Bit ? -8 : -4);
@@ -1066,18 +1293,18 @@ void X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
}
unsigned X86RegisterInfo::getEHExceptionRegister() const {
- assert(0 && "What is the exception register");
+ llvm_unreachable("What is the exception register");
return 0;
}
unsigned X86RegisterInfo::getEHHandlerRegister() const {
- assert(0 && "What is the exception handler register");
+ llvm_unreachable("What is the exception handler register");
return 0;
}
namespace llvm {
-unsigned getX86SubSuperRegister(unsigned Reg, MVT VT, bool High) {
- switch (VT.getSimpleVT()) {
+unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
+ switch (VT.getSimpleVT().SimpleTy) {
default: return Reg;
case MVT::i8:
if (High) {
@@ -1264,14 +1491,21 @@ namespace {
RegNum < RI.getLastVirtReg(); ++RegNum)
MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
- FFI->setMaxAlignment(MaxAlign);
+ if (FFI->getMaxAlignment() == MaxAlign)
+ return false;
- return false;
+ FFI->setMaxAlignment(MaxAlign);
+ return true;
}
virtual const char *getPassName() const {
return "X86 Maximal Stack Alignment Calculator";
}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
};
char MSAC::ID = 0;
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 33b9f5e..f635707 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -93,9 +93,16 @@ public:
/// Code Generation virtual methods...
///
+ /// getMatchingSuperRegClass - Return a subclass of the specified register
+ /// class A so that each register in it has a sub-register of the
+ /// specified sub-register index which is in the specified register class B.
+ virtual const TargetRegisterClass *
+ getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B, unsigned Idx) const;
+
/// getPointerRegClass - Returns a TargetRegisterClass used for pointer
/// values.
- const TargetRegisterClass *getPointerRegClass() const;
+ const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
/// getCrossCopyRegClass - Returns a legal register class to copy a register
/// in the specified class to or from. Returns NULL if it is possible to copy
@@ -125,23 +132,25 @@ public:
bool hasReservedCallFrame(MachineFunction &MF) const;
+ bool hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
+ int &FrameIdx) const;
+
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
- void eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, RegScavenger *RS = NULL) const;
+ unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, int *Value = NULL,
+ RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
+ void emitCalleeSavedFrameMoves(MachineFunction &MF, unsigned LabelId,
+ unsigned FramePtr) const;
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
- void emitFrameMoves(MachineFunction &MF,
- unsigned FrameLabelId, unsigned ReadyLabelId) const;
-
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(MachineFunction &MF) const;
@@ -155,8 +164,8 @@ public:
// getX86SubSuperRegister - X86 utility function. It returns the sub or super
// register of a specific X86 register.
-// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
-unsigned getX86SubSuperRegister(unsigned, MVT, bool High=false);
+// e.g. getX86SubSuperRegister(X86::EAX, EVT::i16) return X86:AX
+unsigned getX86SubSuperRegister(unsigned, EVT, bool High=false);
} // End llvm namespace
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 2e6f017..7bf074d 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -270,42 +270,27 @@ def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
// cannot be encoded.
def GR8 : RegisterClass<"X86", [i8], 8,
- [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL,
+ [AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- // Does the function dedicate RBP / EBP to being a frame ptr?
- // If so, don't allocate SPL or BPL.
- static const unsigned X86_GR8_AO_64_fp[] = {
- X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
- X86::R8B, X86::R9B, X86::R10B, X86::R11B,
- X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B
- };
- // If not, just don't allocate SPL.
static const unsigned X86_GR8_AO_64[] = {
X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
X86::R8B, X86::R9B, X86::R10B, X86::R11B,
X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL
};
- // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
- static const unsigned X86_GR8_AO_32[] = {
- X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH
- };
GR8Class::iterator
GR8Class::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (!Subtarget.is64Bit())
- return X86_GR8_AO_32;
- else if (RI->hasFP(MF))
- return X86_GR8_AO_64_fp;
- else
+ if (Subtarget.is64Bit())
return X86_GR8_AO_64;
+ else
+ return begin();
}
GR8Class::iterator
@@ -313,17 +298,20 @@ def GR8 : RegisterClass<"X86", [i8], 8,
const TargetMachine &TM = MF.getTarget();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ // Does the function dedicate RBP / EBP to being a frame ptr?
if (!Subtarget.is64Bit())
- return X86_GR8_AO_32 + (sizeof(X86_GR8_AO_32) / sizeof(unsigned));
+ // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
+ return begin() + 8;
else if (RI->hasFP(MF))
- return X86_GR8_AO_64_fp + (sizeof(X86_GR8_AO_64_fp) / sizeof(unsigned));
+ // If so, don't allocate SPL or BPL.
+ return array_endof(X86_GR8_AO_64) - 1;
else
- return X86_GR8_AO_64 + (sizeof(X86_GR8_AO_64) / sizeof(unsigned));
+ // If not, just don't allocate SPL.
+ return array_endof(X86_GR8_AO_64);
}
}];
}
-
def GR16 : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP,
R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
@@ -333,42 +321,20 @@ def GR16 : RegisterClass<"X86", [i16], 16,
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- // Does the function dedicate RBP / EBP to being a frame ptr?
- // If so, don't allocate SP or BP.
- static const unsigned X86_GR16_AO_64_fp[] = {
- X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
- X86::R8W, X86::R9W, X86::R10W, X86::R11W,
- X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W
- };
- static const unsigned X86_GR16_AO_32_fp[] = {
- X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX
- };
- // If not, just don't allocate SP.
static const unsigned X86_GR16_AO_64[] = {
X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
X86::R8W, X86::R9W, X86::R10W, X86::R11W,
X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP
};
- static const unsigned X86_GR16_AO_32[] = {
- X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP
- };
GR16Class::iterator
GR16Class::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (Subtarget.is64Bit()) {
- if (RI->hasFP(MF))
- return X86_GR16_AO_64_fp;
- else
- return X86_GR16_AO_64;
- } else {
- if (RI->hasFP(MF))
- return X86_GR16_AO_32_fp;
- else
- return X86_GR16_AO_32;
- }
+ if (Subtarget.is64Bit())
+ return X86_GR16_AO_64;
+ else
+ return begin();
}
GR16Class::iterator
@@ -377,21 +343,26 @@ def GR16 : RegisterClass<"X86", [i16], 16,
const TargetRegisterInfo *RI = TM.getRegisterInfo();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
if (Subtarget.is64Bit()) {
+ // Does the function dedicate RBP to being a frame ptr?
if (RI->hasFP(MF))
- return X86_GR16_AO_64_fp+(sizeof(X86_GR16_AO_64_fp)/sizeof(unsigned));
+ // If so, don't allocate SP or BP.
+ return array_endof(X86_GR16_AO_64) - 1;
else
- return X86_GR16_AO_64 + (sizeof(X86_GR16_AO_64) / sizeof(unsigned));
+ // If not, just don't allocate SP.
+ return array_endof(X86_GR16_AO_64);
} else {
+ // Does the function dedicate EBP to being a frame ptr?
if (RI->hasFP(MF))
- return X86_GR16_AO_32_fp+(sizeof(X86_GR16_AO_32_fp)/sizeof(unsigned));
+ // If so, don't allocate SP or BP.
+ return begin() + 6;
else
- return X86_GR16_AO_32 + (sizeof(X86_GR16_AO_32) / sizeof(unsigned));
+ // If not, just don't allocate SP.
+ return begin() + 7;
}
}
}];
}
-
def GR32 : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
@@ -401,42 +372,20 @@ def GR32 : RegisterClass<"X86", [i32], 32,
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- // Does the function dedicate RBP / EBP to being a frame ptr?
- // If so, don't allocate ESP or EBP.
- static const unsigned X86_GR32_AO_64_fp[] = {
- X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
- X86::R8D, X86::R9D, X86::R10D, X86::R11D,
- X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D
- };
- static const unsigned X86_GR32_AO_32_fp[] = {
- X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX
- };
- // If not, just don't allocate ESP.
static const unsigned X86_GR32_AO_64[] = {
X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
X86::R8D, X86::R9D, X86::R10D, X86::R11D,
X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP
};
- static const unsigned X86_GR32_AO_32[] = {
- X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP
- };
GR32Class::iterator
GR32Class::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (Subtarget.is64Bit()) {
- if (RI->hasFP(MF))
- return X86_GR32_AO_64_fp;
- else
- return X86_GR32_AO_64;
- } else {
- if (RI->hasFP(MF))
- return X86_GR32_AO_32_fp;
- else
- return X86_GR32_AO_32;
- }
+ if (Subtarget.is64Bit())
+ return X86_GR32_AO_64;
+ else
+ return begin();
}
GR32Class::iterator
@@ -445,21 +394,29 @@ def GR32 : RegisterClass<"X86", [i32], 32,
const TargetRegisterInfo *RI = TM.getRegisterInfo();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
if (Subtarget.is64Bit()) {
+ // Does the function dedicate RBP to being a frame ptr?
if (RI->hasFP(MF))
- return X86_GR32_AO_64_fp+(sizeof(X86_GR32_AO_64_fp)/sizeof(unsigned));
+ // If so, don't allocate ESP or EBP.
+ return array_endof(X86_GR32_AO_64) - 1;
else
- return X86_GR32_AO_64 + (sizeof(X86_GR32_AO_64) / sizeof(unsigned));
+ // If not, just don't allocate ESP.
+ return array_endof(X86_GR32_AO_64);
} else {
+ // Does the function dedicate EBP to being a frame ptr?
if (RI->hasFP(MF))
- return X86_GR32_AO_32_fp+(sizeof(X86_GR32_AO_32_fp)/sizeof(unsigned));
+ // If so, don't allocate ESP or EBP.
+ return begin() + 6;
else
- return X86_GR32_AO_32 + (sizeof(X86_GR32_AO_32) / sizeof(unsigned));
+ // If not, just don't allocate ESP.
+ return begin() + 7;
}
}
}];
}
-
+// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
+// RIP isn't really a register and it can't be used anywhere except in an
+// address, but it doesn't cause trouble.
def GR64 : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
@@ -483,6 +440,11 @@ def GR64 : RegisterClass<"X86", [i64], 64,
}];
}
+// Segment registers for use by MOV instructions (and others) that have a
+// segment register as one operand. Always contain a 16-bit segment
+// descriptor.
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> {
+}
// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
@@ -509,38 +471,25 @@ def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
// On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes
// of registers which do not by themselves require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
- [AL, CL, DL, BL, AH, CH, DH, BH,
+ [AL, CL, DL, AH, CH, DH, BL, BH,
SIL, DIL, BPL, SPL]> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- // Does the function dedicate RBP / EBP to being a frame ptr?
- // If so, don't allocate SPL or BPL.
- static const unsigned X86_GR8_NOREX_AO_64_fp[] = {
- X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL
- };
- // If not, just don't allocate SPL.
static const unsigned X86_GR8_NOREX_AO_64[] = {
X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL, X86::BPL
};
- // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
- static const unsigned X86_GR8_NOREX_AO_32[] = {
- X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH
- };
GR8_NOREXClass::iterator
GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (!Subtarget.is64Bit())
- return X86_GR8_NOREX_AO_32;
- else if (RI->hasFP(MF))
- return X86_GR8_NOREX_AO_64_fp;
- else
+ if (Subtarget.is64Bit())
return X86_GR8_NOREX_AO_64;
+ else
+ return begin();
}
GR8_NOREXClass::iterator
@@ -548,15 +497,16 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8,
const TargetMachine &TM = MF.getTarget();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ // Does the function dedicate RBP / EBP to being a frame ptr?
if (!Subtarget.is64Bit())
- return X86_GR8_NOREX_AO_32 +
- (sizeof(X86_GR8_NOREX_AO_32) / sizeof(unsigned));
+ // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
+ return begin() + 8;
else if (RI->hasFP(MF))
- return X86_GR8_NOREX_AO_64_fp +
- (sizeof(X86_GR8_NOREX_AO_64_fp) / sizeof(unsigned));
+ // If so, don't allocate SPL or BPL.
+ return array_endof(X86_GR8_NOREX_AO_64) - 1;
else
- return X86_GR8_NOREX_AO_64 +
- (sizeof(X86_GR8_NOREX_AO_64) / sizeof(unsigned));
+ // If not, just don't allocate SPL.
+ return array_endof(X86_GR8_NOREX_AO_64);
}
}];
}
@@ -564,38 +514,20 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP]> {
let SubRegClassList = [GR8_NOREX, GR8_NOREX];
let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- // Does the function dedicate RBP / EBP to being a frame ptr?
- // If so, don't allocate SP or BP.
- static const unsigned X86_GR16_AO_fp[] = {
- X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX
- };
- // If not, just don't allocate SP.
- static const unsigned X86_GR16_AO[] = {
- X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP
- };
-
- GR16_NOREXClass::iterator
- GR16_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
- return X86_GR16_AO_fp;
- else
- return X86_GR16_AO;
- }
-
GR16_NOREXClass::iterator
GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ // Does the function dedicate RBP / EBP to being a frame ptr?
if (RI->hasFP(MF))
- return X86_GR16_AO_fp+(sizeof(X86_GR16_AO_fp)/sizeof(unsigned));
+ // If so, don't allocate SP or BP.
+ return end() - 2;
else
- return X86_GR16_AO + (sizeof(X86_GR16_AO) / sizeof(unsigned));
+ // If not, just don't allocate SP.
+ return end() - 1;
}
}];
}
@@ -604,89 +536,149 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX];
let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- // Does the function dedicate RBP / EBP to being a frame ptr?
- // If so, don't allocate ESP or EBP.
- static const unsigned X86_GR32_NOREX_AO_fp[] = {
- X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX
- };
- // If not, just don't allocate ESP.
- static const unsigned X86_GR32_NOREX_AO[] = {
- X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP
- };
-
GR32_NOREXClass::iterator
- GR32_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+ GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ // Does the function dedicate RBP / EBP to being a frame ptr?
if (RI->hasFP(MF))
- return X86_GR32_NOREX_AO_fp;
+ // If so, don't allocate ESP or EBP.
+ return end() - 2;
else
- return X86_GR32_NOREX_AO;
+ // If not, just don't allocate ESP.
+ return end() - 1;
}
-
- GR32_NOREXClass::iterator
- GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+ }];
+}
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+ [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> {
+ let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GR64_NOREXClass::iterator
+ GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ // Does the function dedicate RBP to being a frame ptr?
if (RI->hasFP(MF))
- return X86_GR32_NOREX_AO_fp +
- (sizeof(X86_GR32_NOREX_AO_fp) / sizeof(unsigned));
+ // If so, don't allocate RIP, RSP or RBP.
+ return end() - 3;
else
- return X86_GR32_NOREX_AO +
- (sizeof(X86_GR32_NOREX_AO) / sizeof(unsigned));
+ // If not, just don't allocate RIP or RSP.
+ return end() - 2;
}
}];
}
-// GR64_NOREX - GR64 registers which do not require a REX prefix.
-def GR64_NOREX : RegisterClass<"X86", [i64], 64,
- [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+// GR32_NOSP - GR32 registers except ESP.
+def GR32_NOSP : RegisterClass<"X86", [i32], 32,
+ [EAX, ECX, EDX, ESI, EDI, EBX, EBP,
+ R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
+ let SubRegClassList = [GR8, GR8, GR16];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- // Does the function dedicate RBP / EBP to being a frame ptr?
- // If so, don't allocate RSP or RBP.
- static const unsigned X86_GR64_NOREX_AO_fp[] = {
- X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX
- };
- // If not, just don't allocate RSP.
- static const unsigned X86_GR64_NOREX_AO[] = {
- X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX, X86::RBP
+ static const unsigned X86_GR32_NOSP_AO_64[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
+ X86::R8D, X86::R9D, X86::R10D, X86::R11D,
+ X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP
};
- GR64_NOREXClass::iterator
- GR64_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+ GR32_NOSPClass::iterator
+ GR32_NOSPClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit())
+ return X86_GR32_NOSP_AO_64;
+ else
+ return begin();
+ }
+
+ GR32_NOSPClass::iterator
+ GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
- return X86_GR64_NOREX_AO_fp;
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ // Does the function dedicate RBP to being a frame ptr?
+ if (RI->hasFP(MF))
+ // If so, don't allocate EBP.
+ return array_endof(X86_GR32_NOSP_AO_64) - 1;
+ else
+ // If not, any reg in this class is ok.
+ return array_endof(X86_GR32_NOSP_AO_64);
+ } else {
+ // Does the function dedicate EBP to being a frame ptr?
+ if (RI->hasFP(MF))
+ // If so, don't allocate EBP.
+ return begin() + 6;
+ else
+ // If not, any reg in this class is ok.
+ return begin() + 7;
+ }
+ }
+ }];
+}
+
+// GR64_NOSP - GR64 registers except RSP (and RIP).
+def GR64_NOSP : RegisterClass<"X86", [i64], 64,
+ [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ RBX, R14, R15, R12, R13, RBP]> {
+ let SubRegClassList = [GR8, GR8, GR16, GR32_NOSP];
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GR64_NOSPClass::iterator
+ GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return begin(); // None of these are allocatable in 32-bit.
+ if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
+ return end()-1; // If so, don't allocate RBP
else
- return X86_GR64_NOREX_AO;
+ return end(); // If not, any reg in this class is ok.
}
+ }];
+}
- GR64_NOREXClass::iterator
- GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
+def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
+ [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> {
+ let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GR64_NOREX_NOSPClass::iterator
+ GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ // Does the function dedicate RBP to being a frame ptr?
if (RI->hasFP(MF))
- return X86_GR64_NOREX_AO_fp +
- (sizeof(X86_GR64_NOREX_AO_fp) / sizeof(unsigned));
+ // If so, don't allocate RBP.
+ return end() - 1;
else
- return X86_GR64_NOREX_AO +
- (sizeof(X86_GR64_NOREX_AO) / sizeof(unsigned));
+ // If not, any reg in this class is ok.
+ return end();
}
}];
}
// A class to support the 'A' assembler constraint: EAX then EDX.
-def GRAD : RegisterClass<"X86", [i32], 32, [EAX, EDX]>;
+def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> {
+ let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD];
+}
// Scalar SSE2 floating point registers.
def FR32 : RegisterClass<"X86", [f32], 32,
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
index b225f48..990962d 100644
--- a/lib/Target/X86/X86Relocations.h
+++ b/lib/Target/X86/X86Relocations.h
@@ -20,21 +20,31 @@ namespace llvm {
namespace X86 {
/// RelocationType - An enum for the x86 relocation codes. Note that
/// the terminology here doesn't follow x86 convention - word means
- /// 32-bit and dword means 64-bit.
+ /// 32-bit and dword means 64-bit. The relocations will be treated
+ /// by JIT or ObjectCode emitters, this is transparent to the x86 code
+ /// emitter but JIT and ObjectCode will treat them differently
enum RelocationType {
- // reloc_pcrel_word - PC relative relocation, add the relocated value to
- // the value already in memory, after we adjust it for where the PC is.
+ /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+ /// the value already in memory, after we adjust it for where the PC is.
reloc_pcrel_word = 0,
- // reloc_picrel_word - PIC base relative relocation, add the relocated
- // value to the value already in memory, after we adjust it for where the
- // PIC base is.
+ /// reloc_picrel_word - PIC base relative relocation, add the relocated
+ /// value to the value already in memory, after we adjust it for where the
+ /// PIC base is.
reloc_picrel_word = 1,
-
- // reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just
- // add the relocated value to the value already in memory.
+
+ /// reloc_absolute_word - absolute relocation, just add the relocated
+ /// value to the value already in memory.
reloc_absolute_word = 2,
- reloc_absolute_dword = 3
+
+ /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+ /// value to the value already in memory. In object files, it represents a
+ /// value which must be sign-extended when resolving the relocation.
+ reloc_absolute_word_sext = 3,
+
+ /// reloc_absolute_dword - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_dword = 4
};
}
}
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 8506fa6..fb76aeb 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -13,80 +13,111 @@
#define DEBUG_TYPE "subtarget"
#include "X86Subtarget.h"
+#include "X86InstrInfo.h"
#include "X86GenSubtarget.inc"
-#include "llvm/Module.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/GlobalValue.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
#if defined(_MSC_VER)
- #include <intrin.h>
+#include <intrin.h>
#endif
-static cl::opt<X86Subtarget::AsmWriterFlavorTy>
-AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::Unset),
- cl::desc("Choose style of code to emit from X86 backend:"),
- cl::values(
- clEnumValN(X86Subtarget::ATT, "att", "Emit AT&T-style assembly"),
- clEnumValN(X86Subtarget::Intel, "intel", "Emit Intel-style assembly"),
- clEnumValEnd));
-
-
-/// True if accessing the GV requires an extra load. For Windows, dllimported
-/// symbols are indirect, loading the value at address GV rather then the
-/// value of GV itself. This means that the GlobalAddress must be in the base
-/// or index register of the address, not the GV offset field.
-bool X86Subtarget::GVRequiresExtraLoad(const GlobalValue* GV,
- const TargetMachine& TM,
- bool isDirectCall) const
-{
- // FIXME: PIC
- if (TM.getRelocationModel() != Reloc::Static &&
- TM.getCodeModel() != CodeModel::Large) {
+/// ClassifyGlobalReference - Classify a global variable reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
+ // DLLImport only exists on windows, it is implemented as a load from a
+ // DLLIMPORT stub.
+ if (GV->hasDLLImportLinkage())
+ return X86II::MO_DLLIMPORT;
+
+ // GV with ghost linkage (in JIT lazy compilation mode) do not require an
+ // extra load from stub.
+ bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+
+ // X86-64 in PIC mode.
+ if (isPICStyleRIPRel()) {
+ // Large model never uses stubs.
+ if (TM.getCodeModel() == CodeModel::Large)
+ return X86II::MO_NO_FLAG;
+
if (isTargetDarwin()) {
- if (isDirectCall)
- return false;
- bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
- if (GV->hasHiddenVisibility() &&
- (Is64Bit || (!isDecl && !GV->hasCommonLinkage())))
- // If symbol visibility is hidden, the extra load is not needed if
- // target is x86-64 or the symbol is definitely defined in the current
- // translation unit.
- return false;
- return !isDirectCall && (isDecl || GV->isWeakForLinker());
- } else if (isTargetELF()) {
+ // If symbol visibility is hidden, the extra load is not needed if
+ // target is x86-64 or the symbol is definitely defined in the current
+ // translation unit.
+ if (GV->hasDefaultVisibility() &&
+ (isDecl || GV->isWeakForLinker()))
+ return X86II::MO_GOTPCREL;
+ } else {
+ assert(isTargetELF() && "Unknown rip-relative target");
+
// Extra load is needed for all externally visible.
- if (isDirectCall)
- return false;
- if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
- return false;
- return true;
- } else if (isTargetCygMing() || isTargetWindows()) {
- return (GV->hasDLLImportLinkage());
+ if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
+ return X86II::MO_GOTPCREL;
}
+
+ return X86II::MO_NO_FLAG;
}
- return false;
-}
+
+ if (isPICStyleGOT()) { // 32-bit ELF targets.
+ // Extra load is needed for all externally visible.
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return X86II::MO_GOTOFF;
+ return X86II::MO_GOT;
+ }
+
+ if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode.
+ // Determine whether we have a stub reference and/or whether the reference
+ // is relative to the PIC base or not.
+
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return X86II::MO_PIC_BASE_OFFSET;
-/// True if accessing the GV requires a register. This is a superset of the
-/// cases where GVRequiresExtraLoad is true. Some variations of PIC require
-/// a register, but not an extra load.
-bool X86Subtarget::GVRequiresRegister(const GlobalValue *GV,
- const TargetMachine& TM,
- bool isDirectCall) const
-{
- if (GVRequiresExtraLoad(GV, TM, isDirectCall))
- return true;
- // Code below here need only consider cases where GVRequiresExtraLoad
- // returns false.
- if (TM.getRelocationModel() == Reloc::PIC_)
- return !isDirectCall &&
- (GV->hasLocalLinkage() || GV->hasExternalLinkage());
- return false;
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+
+ // If symbol visibility is hidden, we have a stub for common symbol
+ // references and external declarations.
+ if (isDecl || GV->hasCommonLinkage()) {
+ // Hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
+ }
+
+ // Otherwise, no stub.
+ return X86II::MO_PIC_BASE_OFFSET;
+ }
+
+ if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode.
+ // Determine whether we have a stub reference.
+
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return X86II::MO_NO_FLAG;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_NONLAZY;
+
+ // Otherwise, no stub.
+ return X86II::MO_NO_FLAG;
+ }
+
+ // Direct static reference to global.
+ return X86II::MO_NO_FLAG;
}
+
/// getBZeroEntry - This function returns the name of a function which has an
/// interface like the non-standard bzero function, if such a function exists on
/// the current subtarget and it is considered prefereable over memset with zero
@@ -120,9 +151,9 @@ unsigned X86Subtarget::getSpecialAddressLatency() const {
/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
/// specified arguments. If we can't run cpuid on the host, return true.
-bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
- unsigned *rECX, unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64)
+static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
#if defined(__GNUC__)
// gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
asm ("movq\t%%rbx, %%rsi\n\t"
@@ -192,18 +223,19 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
char c[12];
} text;
- if (X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+ if (GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
return;
- X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+ GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
- if ((EDX >> 23) & 0x1) X86SSELevel = MMX;
- if ((EDX >> 25) & 0x1) X86SSELevel = SSE1;
- if ((EDX >> 26) & 0x1) X86SSELevel = SSE2;
- if (ECX & 0x1) X86SSELevel = SSE3;
- if ((ECX >> 9) & 0x1) X86SSELevel = SSSE3;
- if ((ECX >> 19) & 0x1) X86SSELevel = SSE41;
- if ((ECX >> 20) & 0x1) X86SSELevel = SSE42;
+ if ((EDX >> 15) & 1) HasCMov = true;
+ if ((EDX >> 23) & 1) X86SSELevel = MMX;
+ if ((EDX >> 25) & 1) X86SSELevel = SSE1;
+ if ((EDX >> 26) & 1) X86SSELevel = SSE2;
+ if (ECX & 0x1) X86SSELevel = SSE3;
+ if ((ECX >> 9) & 1) X86SSELevel = SSSE3;
+ if ((ECX >> 19) & 1) X86SSELevel = SSE41;
+ if ((ECX >> 20) & 1) X86SSELevel = SSE42;
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
@@ -218,7 +250,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
DetectFamilyModel(EAX, Family, Model);
IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
- X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
HasX86_64 = (EDX >> 29) & 0x1;
HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
HasFMA4 = IsAMD && ((ECX >> 16) & 0x1);
@@ -227,13 +259,13 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
static const char *GetCurrentX86CPU() {
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
- if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+ if (GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
return "generic";
unsigned Family = 0;
unsigned Model = 0;
DetectFamilyModel(EAX, Family, Model);
- X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
bool Em64T = (EDX >> 29) & 0x1;
bool HasSSE3 = (ECX & 0x1);
@@ -242,7 +274,7 @@ static const char *GetCurrentX86CPU() {
char c[12];
} text;
- X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+ GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
if (memcmp(text.c, "GenuineIntel", 12) == 0) {
switch (Family) {
case 3:
@@ -319,9 +351,7 @@ static const char *GetCurrentX86CPU() {
}
case 15:
if (HasSSE3) {
- switch (Model) {
- default: return "k8-sse3";
- }
+ return "k8-sse3";
} else {
switch (Model) {
case 1: return "opteron";
@@ -330,9 +360,7 @@ static const char *GetCurrentX86CPU() {
}
}
case 16:
- switch (Model) {
- default: return "amdfam10";
- }
+ return "amdfam10";
default:
return "generic";
}
@@ -341,11 +369,12 @@ static const char *GetCurrentX86CPU() {
}
}
-X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
- : AsmFlavor(AsmWriterFlavor)
- , PICStyle(PICStyles::None)
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
+ bool is64Bit)
+ : PICStyle(PICStyles::None)
, X86SSELevel(NoMMXSSE)
, X863DNowLevel(NoThreeDNow)
+ , HasCMov(false)
, HasX86_64(false)
, HasSSE4A(false)
, HasAVX(false)
@@ -384,15 +413,14 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
if (Is64Bit)
HasX86_64 = true;
- DOUT << "Subtarget features: SSELevel " << X86SSELevel
- << ", 3DNowLevel " << X863DNowLevel
- << ", 64bit " << HasX86_64 << "\n";
+ DEBUG(errs() << "Subtarget features: SSELevel " << X86SSELevel
+ << ", 3DNowLevel " << X863DNowLevel
+ << ", 64bit " << HasX86_64 << "\n");
assert((!Is64Bit || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
// Set the boolean corresponding to the current target triple, or the default
// if one cannot be determined, to true.
- const std::string& TT = M.getTargetTriple();
if (TT.length() > 5) {
size_t Pos;
if ((Pos = TT.find("-darwin")) != std::string::npos) {
@@ -415,38 +443,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
TargetType = isWindows;
} else if (TT.find("windows") != std::string::npos) {
TargetType = isWindows;
- }
- else if (TT.find("-cl") != std::string::npos) {
+ } else if (TT.find("-cl") != std::string::npos) {
TargetType = isDarwin;
DarwinVers = 9;
}
- } else if (TT.empty()) {
-#if defined(__CYGWIN__)
- TargetType = isCygwin;
-#elif defined(__MINGW32__) || defined(__MINGW64__)
- TargetType = isMingw;
-#elif defined(__APPLE__)
- TargetType = isDarwin;
-#if __APPLE_CC__ > 5400
- DarwinVers = 9; // GCC 5400+ is Leopard.
-#else
- DarwinVers = 8; // Minimum supported darwin is Tiger.
-#endif
-
-#elif defined(_WIN32) || defined(_WIN64)
- TargetType = isWindows;
-#elif defined(__linux__)
- // Linux doesn't imply ELF, but we don't currently support anything else.
- TargetType = isELF;
- IsLinux = true;
-#endif
- }
-
- // If the asm syntax hasn't been overridden on the command line, use whatever
- // the target wants.
- if (AsmFlavor == X86Subtarget::Unset) {
- AsmFlavor = (TargetType == isWindows)
- ? X86Subtarget::Intel : X86Subtarget::ATT;
}
// Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 0d1434f..a2e368d 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -18,23 +18,22 @@
#include <string>
namespace llvm {
-class Module;
class GlobalValue;
class TargetMachine;
+/// PICStyles - The X86 backend supports a number of different styles of PIC.
+///
namespace PICStyles {
enum Style {
- Stub, GOT, RIPRel, WinPIC, None
+ StubPIC, // Used on i386-darwin in -fPIC mode.
+ StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode.
+ GOT, // Used on many 32-bit unices in -fPIC mode.
+ RIPRel, // Used on X86-64 when not in -static mode.
+ None // Set when in -static mode (not PIC or DynamicNoPIC mode).
};
}
class X86Subtarget : public TargetSubtarget {
-public:
- enum AsmWriterFlavorTy {
- // Note: This numbering has to match the GCC assembler dialects for inline
- // asm alternatives to work right.
- ATT = 0, Intel = 1, Unset
- };
protected:
enum X86SSEEnum {
NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
@@ -44,10 +43,6 @@ protected:
NoThreeDNow, ThreeDNow, ThreeDNowA
};
- /// AsmFlavor - Which x86 asm dialect to use.
- ///
- AsmWriterFlavorTy AsmFlavor;
-
/// PICStyle - Which PIC style to use
///
PICStyles::Style PICStyle;
@@ -60,6 +55,10 @@ protected:
///
X863DNowEnum X863DNowLevel;
+ /// HasCMov - True if this processor has conditional move instructions
+ /// (generally pentium pro+).
+ bool HasCMov;
+
/// HasX86_64 - True if the processor supports X86-64 instructions.
///
bool HasX86_64;
@@ -95,7 +94,7 @@ protected:
unsigned MaxInlineSizeThreshold;
private:
- /// Is64Bit - True if the processor supports 64-bit instructions and module
+ /// Is64Bit - True if the processor supports 64-bit instructions and
/// pointer size is 64 bit.
bool Is64Bit;
@@ -105,9 +104,9 @@ public:
} TargetType;
/// This constructor initializes the data members to match that
- /// of the specified module.
+ /// of the specified triple.
///
- X86Subtarget(const Module &M, const std::string &FS, bool is64Bit);
+ X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit);
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
@@ -145,66 +144,67 @@ public:
bool hasAVX() const { return HasAVX; }
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
-
bool isBTMemSlow() const { return IsBTMemSlow; }
- unsigned getAsmFlavor() const {
- return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
- }
-
- bool isFlavorAtt() const { return AsmFlavor == ATT; }
- bool isFlavorIntel() const { return AsmFlavor == Intel; }
-
bool isTargetDarwin() const { return TargetType == isDarwin; }
- bool isTargetELF() const {
- return TargetType == isELF;
- }
+ bool isTargetELF() const { return TargetType == isELF; }
+
bool isTargetWindows() const { return TargetType == isWindows; }
bool isTargetMingw() const { return TargetType == isMingw; }
- bool isTargetCygMing() const { return (TargetType == isMingw ||
- TargetType == isCygwin); }
bool isTargetCygwin() const { return TargetType == isCygwin; }
+ bool isTargetCygMing() const {
+ return TargetType == isMingw || TargetType == isCygwin;
+ }
+
+ /// isTargetCOFF - Return true if this is any COFF/Windows target variant.
+ bool isTargetCOFF() const {
+ return TargetType == isMingw || TargetType == isCygwin ||
+ TargetType == isWindows;
+ }
+
bool isTargetWin64() const {
- return (Is64Bit && (TargetType == isMingw || TargetType == isWindows));
+ return Is64Bit && (TargetType == isMingw || TargetType == isWindows);
}
std::string getDataLayout() const {
const char *p;
if (is64Bit())
p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128";
- else {
- if (isTargetDarwin())
- p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128";
- else
- p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32";
- }
+ else if (isTargetDarwin())
+ p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128";
+ else
+ p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32";
return std::string(p);
}
bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
- bool isPICStyleStub() const { return PICStyle == PICStyles::Stub; }
bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
- bool isPICStyleWinPIC() const { return PICStyle == PICStyles::WinPIC; }
+
+ bool isPICStyleStubPIC() const {
+ return PICStyle == PICStyles::StubPIC;
+ }
+
+ bool isPICStyleStubNoDynamic() const {
+ return PICStyle == PICStyles::StubDynamicNoPIC;
+ }
+ bool isPICStyleStubAny() const {
+ return PICStyle == PICStyles::StubDynamicNoPIC ||
+ PICStyle == PICStyles::StubPIC; }
- /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
+ /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard,
+ /// 10 = Snow Leopard, etc.
unsigned getDarwinVers() const { return DarwinVers; }
/// isLinux - Return true if the target is "Linux".
bool isLinux() const { return IsLinux; }
- /// True if accessing the GV requires an extra load. For Windows, dllimported
- /// symbols are indirect, loading the value at address GV rather then the
- /// value of GV itself. This means that the GlobalAddress must be in the base
- /// or index register of the address, not the GV offset field.
- bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM,
- bool isDirectCall) const;
-
- /// True if accessing the GV requires a register. This is a superset of the
- /// cases where GVRequiresExtraLoad is true. Some variations of PIC require
- /// a register, but not an extra load.
- bool GVRequiresRegister(const GlobalValue* GV, const TargetMachine& TM,
- bool isDirectCall) const;
+
+ /// ClassifyGlobalReference - Classify a global variable reference for the
+ /// current subtarget according to how we should reference it in a non-pcrel
+ /// context.
+ unsigned char ClassifyGlobalReference(const GlobalValue *GV,
+ const TargetMachine &TM)const;
/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
/// to immediate address.
@@ -224,13 +224,6 @@ public:
unsigned getSpecialAddressLatency() const;
};
-namespace X86 {
- /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
- /// the specified arguments. If we can't run cpuid on the host, return true.
- bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
- unsigned *rECX, unsigned *rEDX);
-}
-
} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index b000914..a61de1c 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -11,172 +11,134 @@
//
//===----------------------------------------------------------------------===//
-#include "X86TargetAsmInfo.h"
+#include "X86MCAsmInfo.h"
#include "X86TargetMachine.h"
#include "X86.h"
-#include "llvm/Module.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
-/// X86TargetMachineModule - Note that this is used on hosts that cannot link
-/// in a library unless there are references into the library. In particular,
-/// it seems that it is not possible to get things to work on Win32 without
-/// this. Though it is unused, do not remove it.
-extern "C" int X86TargetMachineModule;
-int X86TargetMachineModule = 0;
-
-// Register the target.
-static RegisterTarget<X86_32TargetMachine>
-X("x86", "32-bit X86: Pentium-Pro and above");
-static RegisterTarget<X86_64TargetMachine>
-Y("x86-64", "64-bit X86: EM64T and AMD64");
-
-// Force static initialization.
-extern "C" void LLVMInitializeX86Target() { }
-
-// No assembler printer by default
-X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0;
-
-const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const {
- if (Subtarget.isFlavorIntel())
- return new X86WinTargetAsmInfo(*this);
- else
- switch (Subtarget.TargetType) {
- case X86Subtarget::isDarwin:
- return new X86DarwinTargetAsmInfo(*this);
- case X86Subtarget::isELF:
- return new X86ELFTargetAsmInfo(*this);
- case X86Subtarget::isMingw:
- case X86Subtarget::isCygwin:
- return new X86COFFTargetAsmInfo(*this);
- case X86Subtarget::isWindows:
- return new X86WinTargetAsmInfo(*this);
- default:
- return new X86GenericTargetAsmInfo(*this);
- }
-}
-
-unsigned X86_32TargetMachine::getJITMatchQuality() {
-#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
- return 10;
-#endif
- return 0;
-}
-
-unsigned X86_64TargetMachine::getJITMatchQuality() {
-#if defined(__x86_64__) || defined(_M_AMD64)
- return 10;
-#endif
- return 0;
+static const MCAsmInfo *createMCAsmInfo(const Target &T,
+ const StringRef &TT) {
+ Triple TheTriple(TT);
+ switch (TheTriple.getOS()) {
+ case Triple::Darwin:
+ return new X86MCAsmInfoDarwin(TheTriple);
+ case Triple::MinGW32:
+ case Triple::MinGW64:
+ case Triple::Cygwin:
+ return new X86MCAsmInfoCOFF(TheTriple);
+ case Triple::Win32:
+ return new X86WinMCAsmInfo(TheTriple);
+ default:
+ return new X86ELFMCAsmInfo(TheTriple);
+ }
}
-unsigned X86_32TargetMachine::getModuleMatchQuality(const Module &M) {
- // We strongly match "i[3-9]86-*".
- std::string TT = M.getTargetTriple();
- if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
- TT[4] == '-' && TT[1] - '3' < 6)
- return 20;
- // If the target triple is something non-X86, we don't match.
- if (!TT.empty()) return 0;
+extern "C" void LLVMInitializeX86Target() {
+ // Register the target.
+ RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
+ RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
- if (M.getEndianness() == Module::LittleEndian &&
- M.getPointerSize() == Module::Pointer32)
- return 10; // Weak match
- else if (M.getEndianness() != Module::AnyEndianness ||
- M.getPointerSize() != Module::AnyPointerSize)
- return 0; // Match for some other target
+ // Register the target asm info.
+ RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo);
+ RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
- return getJITMatchQuality()/2;
+ // Register the code emitter.
+ TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter);
+ TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86MCCodeEmitter);
}
-unsigned X86_64TargetMachine::getModuleMatchQuality(const Module &M) {
- // We strongly match "x86_64-*".
- std::string TT = M.getTargetTriple();
- if (TT.size() >= 7 && TT[0] == 'x' && TT[1] == '8' && TT[2] == '6' &&
- TT[3] == '_' && TT[4] == '6' && TT[5] == '4' && TT[6] == '-')
- return 20;
-
- // We strongly match "amd64-*".
- if (TT.size() >= 6 && TT[0] == 'a' && TT[1] == 'm' && TT[2] == 'd' &&
- TT[3] == '6' && TT[4] == '4' && TT[5] == '-')
- return 20;
-
- // If the target triple is something non-X86-64, we don't match.
- if (!TT.empty()) return 0;
-
- if (M.getEndianness() == Module::LittleEndian &&
- M.getPointerSize() == Module::Pointer64)
- return 10; // Weak match
- else if (M.getEndianness() != Module::AnyEndianness ||
- M.getPointerSize() != Module::AnyPointerSize)
- return 0; // Match for some other target
- return getJITMatchQuality()/2;
-}
-
-X86_32TargetMachine::X86_32TargetMachine(const Module &M, const std::string &FS)
- : X86TargetMachine(M, FS, false) {
+X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS)
+ : X86TargetMachine(T, TT, FS, false) {
}
-X86_64TargetMachine::X86_64TargetMachine(const Module &M, const std::string &FS)
- : X86TargetMachine(M, FS, true) {
+X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS)
+ : X86TargetMachine(T, TT, FS, true) {
}
-/// X86TargetMachine ctor - Create an ILP32 architecture model
+/// X86TargetMachine ctor - Create an X86 target.
///
-X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS,
- bool is64Bit)
- : Subtarget(M, FS, is64Bit),
+X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS, bool is64Bit)
+ : LLVMTargetMachine(T, TT),
+ Subtarget(TT, FS, is64Bit),
DataLayout(Subtarget.getDataLayout()),
FrameInfo(TargetFrameInfo::StackGrowsDown,
- Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
+ Subtarget.getStackAlignment(),
+ (Subtarget.isTargetWin64() ? -40 :
+ (Subtarget.is64Bit() ? -8 : -4))),
InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
DefRelocModel = getRelocationModel();
- // FIXME: Correctly select PIC model for Win64 stuff
+
+ // If no relocation model was picked, default as appropriate for the target.
if (getRelocationModel() == Reloc::Default) {
- if (Subtarget.isTargetDarwin() ||
- (Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64()))
- setRelocationModel(Reloc::DynamicNoPIC);
- else
+ if (!Subtarget.isTargetDarwin())
setRelocationModel(Reloc::Static);
+ else if (Subtarget.is64Bit())
+ setRelocationModel(Reloc::PIC_);
+ else
+ setRelocationModel(Reloc::DynamicNoPIC);
}
- // ELF doesn't have a distinct dynamic-no-PIC model. Dynamic-no-PIC
- // is defined as a model for code which may be used in static or
- // dynamic executables but not necessarily a shared library. On ELF
- // implement this by using the Static model.
- if (Subtarget.isTargetELF() &&
- getRelocationModel() == Reloc::DynamicNoPIC)
- setRelocationModel(Reloc::Static);
-
- if (Subtarget.is64Bit()) {
- // No DynamicNoPIC support under X86-64.
- if (getRelocationModel() == Reloc::DynamicNoPIC)
+ assert(getRelocationModel() != Reloc::Default &&
+ "Relocation mode not picked");
+
+ // If no code model is picked, default to small.
+ if (getCodeModel() == CodeModel::Default)
+ setCodeModel(CodeModel::Small);
+
+ // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC
+ // is defined as a model for code which may be used in static or dynamic
+ // executables but not necessarily a shared library. On X86-32 we just
+ // compile in -static mode, in x86-64 we use PIC.
+ if (getRelocationModel() == Reloc::DynamicNoPIC) {
+ if (is64Bit)
setRelocationModel(Reloc::PIC_);
- // Default X86-64 code model is small.
- if (getCodeModel() == CodeModel::Default)
- setCodeModel(CodeModel::Small);
+ else if (!Subtarget.isTargetDarwin())
+ setRelocationModel(Reloc::Static);
}
- if (Subtarget.isTargetCygMing())
- Subtarget.setPICStyle(PICStyles::WinPIC);
- else if (Subtarget.isTargetDarwin()) {
+ // If we are on Darwin, disallow static relocation model in X86-64 mode, since
+ // the Mach-O file format doesn't support it.
+ if (getRelocationModel() == Reloc::Static &&
+ Subtarget.isTargetDarwin() &&
+ is64Bit)
+ setRelocationModel(Reloc::PIC_);
+
+ // Determine the PICStyle based on the target selected.
+ if (getRelocationModel() == Reloc::Static) {
+ // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
+ Subtarget.setPICStyle(PICStyles::None);
+ } else if (Subtarget.isTargetCygMing()) {
+ Subtarget.setPICStyle(PICStyles::None);
+ } else if (Subtarget.isTargetDarwin()) {
if (Subtarget.is64Bit())
Subtarget.setPICStyle(PICStyles::RIPRel);
- else
- Subtarget.setPICStyle(PICStyles::Stub);
+ else if (getRelocationModel() == Reloc::PIC_)
+ Subtarget.setPICStyle(PICStyles::StubPIC);
+ else {
+ assert(getRelocationModel() == Reloc::DynamicNoPIC);
+ Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
+ }
} else if (Subtarget.isTargetELF()) {
if (Subtarget.is64Bit())
Subtarget.setPICStyle(PICStyles::RIPRel);
else
Subtarget.setPICStyle(PICStyles::GOT);
}
+
+ // Finally, if we have "none" as our PIC style, force to static mode.
+ if (Subtarget.getPICStyle() == PICStyles::None)
+ setRelocationModel(Reloc::Static);
}
//===----------------------------------------------------------------------===//
@@ -212,33 +174,16 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
return true; // -print-machineinstr should print after this.
}
-bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
- bool Verbose,
- raw_ostream &Out) {
- // FIXME: Move this somewhere else!
- // On Darwin, override 64-bit static relocation to pic_ since the
- // assembler doesn't support it.
- if (DefRelocModel == Reloc::Static &&
- Subtarget.isTargetDarwin() && Subtarget.is64Bit() &&
- getCodeModel() == CodeModel::Small)
- setRelocationModel(Reloc::PIC_);
-
- assert(AsmPrinterCtor && "AsmPrinter was not linked in");
- if (AsmPrinterCtor)
- PM.add(AsmPrinterCtor(Out, *this, Verbose));
- return false;
-}
-
bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
- bool DumpAsm,
MachineCodeEmitter &MCE) {
// FIXME: Move this to TargetJITInfo!
// On Darwin, do not override 64-bit setting made in X86TargetMachine().
if (DefRelocModel == Reloc::Default &&
- (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit()))
+ (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
setRelocationModel(Reloc::Static);
+ Subtarget.setPICStyle(PICStyles::None);
+ }
// 64-bit JIT places everything in the same buffer except external functions.
// On Darwin, use small code model but hack the call instruction for
@@ -251,24 +196,20 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
}
PM.add(createX86CodeEmitterPass(*this, MCE));
- if (DumpAsm) {
- assert(AsmPrinterCtor && "AsmPrinter was not linked in");
- if (AsmPrinterCtor)
- PM.add(AsmPrinterCtor(errs(), *this, true));
- }
return false;
}
bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
- bool DumpAsm,
JITCodeEmitter &JCE) {
// FIXME: Move this to TargetJITInfo!
// On Darwin, do not override 64-bit setting made in X86TargetMachine().
if (DefRelocModel == Reloc::Default &&
- (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit()))
+ (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
setRelocationModel(Reloc::Static);
+ Subtarget.setPICStyle(PICStyles::None);
+ }
// 64-bit JIT places everything in the same buffer except external functions.
// On Darwin, use small code model but hack the call instruction for
@@ -281,40 +222,34 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
}
PM.add(createX86JITCodeEmitterPass(*this, JCE));
- if (DumpAsm) {
- assert(AsmPrinterCtor && "AsmPrinter was not linked in");
- if (AsmPrinterCtor)
- PM.add(AsmPrinterCtor(errs(), *this, true));
- }
return false;
}
+bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ ObjectCodeEmitter &OCE) {
+ PM.add(createX86ObjectCodeEmitterPass(*this, OCE));
+ return false;
+}
+
bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
- bool DumpAsm,
MachineCodeEmitter &MCE) {
PM.add(createX86CodeEmitterPass(*this, MCE));
- if (DumpAsm) {
- assert(AsmPrinterCtor && "AsmPrinter was not linked in");
- if (AsmPrinterCtor)
- PM.add(AsmPrinterCtor(errs(), *this, true));
- }
-
return false;
}
bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
- bool DumpAsm,
JITCodeEmitter &JCE) {
PM.add(createX86JITCodeEmitterPass(*this, JCE));
- if (DumpAsm) {
- assert(AsmPrinterCtor && "AsmPrinter was not linked in");
- if (AsmPrinterCtor)
- PM.add(AsmPrinterCtor(errs(), *this, true));
- }
-
return false;
}
+bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ ObjectCodeEmitter &OCE) {
+ PM.add(createX86ObjectCodeEmitterPass(*this, OCE));
+ return false;
+}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 90a5cc2..b538408 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -26,7 +26,7 @@
namespace llvm {
-class raw_ostream;
+class formatted_raw_ostream;
class X86TargetMachine : public LLVMTargetMachine {
X86Subtarget Subtarget;
@@ -38,18 +38,9 @@ class X86TargetMachine : public LLVMTargetMachine {
X86ELFWriterInfo ELFWriterInfo;
Reloc::Model DefRelocModel; // Reloc model before it's overridden.
-protected:
- virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
- // To avoid having target depend on the asmprinter stuff libraries, asmprinter
- // set this functions to ctor pointer at startup time if they are linked in.
- typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
- X86TargetMachine &tm,
- bool verbose);
- static AsmPrinterCtorFn AsmPrinterCtor;
-
public:
- X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit);
+ X86TargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS, bool is64Bit);
virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
@@ -66,50 +57,41 @@ public:
return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
}
- static unsigned getModuleMatchQuality(const Module &M);
- static unsigned getJITMatchQuality();
-
- static void registerAsmPrinter(AsmPrinterCtorFn F) {
- AsmPrinterCtor = F;
- }
-
// Set up the pass pipeline.
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addAssemblyEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
- bool Verbose, raw_ostream &Out);
virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
- bool DumpAsm, MachineCodeEmitter &MCE);
+ MachineCodeEmitter &MCE);
virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
- bool DumpAsm, JITCodeEmitter &JCE);
+ JITCodeEmitter &JCE);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ ObjectCodeEmitter &OCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ MachineCodeEmitter &MCE);
virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
- bool DumpAsm, MachineCodeEmitter &MCE);
+ JITCodeEmitter &JCE);
virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
- bool DumpAsm, JITCodeEmitter &JCE);
+ ObjectCodeEmitter &OCE);
};
/// X86_32TargetMachine - X86 32-bit target machine.
///
class X86_32TargetMachine : public X86TargetMachine {
public:
- X86_32TargetMachine(const Module &M, const std::string &FS);
-
- static unsigned getJITMatchQuality();
- static unsigned getModuleMatchQuality(const Module &M);
+ X86_32TargetMachine(const Target &T, const std::string &M,
+ const std::string &FS);
};
/// X86_64TargetMachine - X86 64-bit target machine.
///
class X86_64TargetMachine : public X86TargetMachine {
public:
- X86_64TargetMachine(const Module &M, const std::string &FS);
-
- static unsigned getJITMatchQuality();
- static unsigned getModuleMatchQuality(const Module &M);
+ X86_64TargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS);
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
new file mode 100644
index 0000000..d39b3c4
--- /dev/null
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -0,0 +1,65 @@
+//===-- llvm/Target/X86/X86TargetObjectFile.cpp - X86 Object Info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetObjectFile.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+using namespace llvm;
+
+const MCExpr *X8632_MachoTargetObjectFile::
+getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI,
+ bool &IsIndirect, bool &IsPCRel) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+ IsIndirect = true;
+ IsPCRel = false;
+
+
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += "$non_lazy_ptr";
+
+ // Add information about the stub reference to MachOMMI so that the stub gets
+ // emitted by the asmprinter.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
+ const MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym);
+ if (StubSym == 0) {
+ Name.clear();
+ Mang->getNameWithPrefix(Name, GV, false);
+ StubSym = getContext().GetOrCreateSymbol(Name.str());
+ }
+
+ return MCSymbolRefExpr::Create(Sym, getContext());
+}
+
+const MCExpr *X8664_MachoTargetObjectFile::
+getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI,
+ bool &IsIndirect, bool &IsPCRel) const {
+
+ // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
+ // is an indirect pc-relative reference.
+ IsIndirect = true;
+ IsPCRel = true;
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, false);
+ Name += "@GOTPCREL";
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Name.str(), getContext());
+ const MCExpr *Four = MCConstantExpr::Create(4, getContext());
+ return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+}
+
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
new file mode 100644
index 0000000..377a93b
--- /dev/null
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- llvm/Target/X86/X86TargetObjectFile.h - X86 Object Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H
+#define LLVM_TARGET_X86_TARGETOBJECTFILE_H
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+ /// X8632_MachoTargetObjectFile - This TLOF implementation is used for
+ /// Darwin/x86-32.
+ class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+ public:
+
+ virtual const MCExpr *
+ getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI,
+ bool &IsIndirect, bool &IsPCRel) const;
+ };
+
+ /// X8664_MachoTargetObjectFile - This TLOF implementation is used for
+ /// Darwin/x86-64.
+ class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+ public:
+
+ virtual const MCExpr *
+ getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI,
+ bool &IsIndirect, bool &IsPCRel) const;
+ };
+} // end namespace llvm
+
+#endif
OpenPOWER on IntegriCloud