diff options
Diffstat (limited to 'lib/Target/X86')
58 files changed, 9798 insertions, 3677 deletions
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt new file mode 100644 index 0000000..034d5ab --- /dev/null +++ b/lib/Target/X86/AsmParser/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMX86AsmParser + X86AsmParser.cpp + ) +add_dependencies(LLVMX86AsmParser X86CodeGenTable_gen) diff --git a/lib/Target/X86/AsmParser/Makefile b/lib/Target/X86/AsmParser/Makefile new file mode 100644 index 0000000..25fb0a2 --- /dev/null +++ b/lib/Target/X86/AsmParser/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/X86/AsmParser/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86AsmParser + +# Hack: we need to include 'main' x86 target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp new file mode 100644 index 0000000..c357b4d --- /dev/null +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -0,0 +1,479 @@ +//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmLexer.h" +#include "llvm/MC/MCAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmParser.h" +using namespace llvm; + +namespace { +struct X86Operand; + +class X86ATTAsmParser : public TargetAsmParser { + MCAsmParser &Parser; + +private: + MCAsmParser &getParser() const { return Parser; } + + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + + void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } + + bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + + bool ParseRegister(X86Operand &Op); + + bool ParseOperand(X86Operand &Op); + + bool ParseMemOperand(X86Operand &Op); + + bool ParseDirectiveWord(unsigned Size, SMLoc L); + + /// @name Auto-generated Match Functions + /// { + + bool MatchInstruction(SmallVectorImpl<X86Operand> &Operands, + MCInst &Inst); + + /// MatchRegisterName - Match the given string to a register name, or 0 if + /// there is no match. + unsigned MatchRegisterName(const StringRef &Name); + + /// } + +public: + X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) + : TargetAsmParser(T), Parser(_Parser) {} + + virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst); + + virtual bool ParseDirective(AsmToken DirectiveID); +}; + +} // end anonymous namespace + + +namespace { + +/// X86Operand - Instances of this class represent a parsed X86 machine +/// instruction. +struct X86Operand { + enum { + Token, + Register, + Immediate, + Memory + } Kind; + + union { + struct { + const char *Data; + unsigned Length; + } Tok; + + struct { + unsigned RegNo; + } Reg; + + struct { + const MCExpr *Val; + } Imm; + + struct { + unsigned SegReg; + const MCExpr *Disp; + unsigned BaseReg; + unsigned IndexReg; + unsigned Scale; + } Mem; + }; + + StringRef getToken() const { + assert(Kind == Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const { + assert(Kind == Register && "Invalid access!"); + return Reg.RegNo; + } + + const MCExpr *getImm() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.Val; + } + + const MCExpr *getMemDisp() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.Disp; + } + unsigned getMemSegReg() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.SegReg; + } + unsigned getMemBaseReg() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.BaseReg; + } + unsigned getMemIndexReg() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.IndexReg; + } + unsigned getMemScale() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.Scale; + } + + bool isToken() const {return Kind == Token; } + + bool isImm() const { return Kind == Immediate; } + + bool isImmSExt8() const { + // Accept immediates which fit in 8 bits when sign extended, and + // non-absolute immediates. + if (!isImm()) + return false; + + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) { + int64_t Value = CE->getValue(); + return Value == (int64_t) (int8_t) Value; + } + + return true; + } + + bool isMem() const { return Kind == Memory; } + + bool isReg() const { return Kind == Register; } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateExpr(getImm())); + } + + void addImmSExt8Operands(MCInst &Inst, unsigned N) const { + // FIXME: Support user customization of the render method. + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateExpr(getImm())); + } + + void addMemOperands(MCInst &Inst, unsigned N) const { + assert((N == 4 || N == 5) && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); + Inst.addOperand(MCOperand::CreateImm(getMemScale())); + Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); + Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + + // FIXME: What a hack. + if (N == 5) + Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); + } + + static X86Operand CreateToken(StringRef Str) { + X86Operand Res; + Res.Kind = Token; + Res.Tok.Data = Str.data(); + Res.Tok.Length = Str.size(); + return Res; + } + + static X86Operand CreateReg(unsigned RegNo) { + X86Operand Res; + Res.Kind = Register; + Res.Reg.RegNo = RegNo; + return Res; + } + + static X86Operand CreateImm(const MCExpr *Val) { + X86Operand Res; + Res.Kind = Immediate; + Res.Imm.Val = Val; + return Res; + } + + static X86Operand CreateMem(unsigned SegReg, const MCExpr *Disp, + unsigned BaseReg, unsigned IndexReg, + unsigned Scale) { + // We should never just have a displacement, that would be an immediate. + assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); + + // The scale should always be one of {1,2,4,8}. + assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && + "Invalid scale!"); + X86Operand Res; + Res.Kind = Memory; + Res.Mem.SegReg = SegReg; + Res.Mem.Disp = Disp; + Res.Mem.BaseReg = BaseReg; + Res.Mem.IndexReg = IndexReg; + Res.Mem.Scale = Scale; + return Res; + } +}; + +} // end anonymous namespace. + + +bool X86ATTAsmParser::ParseRegister(X86Operand &Op) { + const AsmToken &TokPercent = getLexer().getTok(); + (void)TokPercent; // Avoid warning when assertions are disabled. + assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); + getLexer().Lex(); // Eat percent token. + + const AsmToken &Tok = getLexer().getTok(); + if (Tok.isNot(AsmToken::Identifier)) + return Error(Tok.getLoc(), "invalid register name"); + + // FIXME: Validate register for the current architecture; we have to do + // validation later, so maybe there is no need for this here. + unsigned RegNo; + + RegNo = MatchRegisterName(Tok.getString()); + if (RegNo == 0) + return Error(Tok.getLoc(), "invalid register name"); + + Op = X86Operand::CreateReg(RegNo); + getLexer().Lex(); // Eat identifier token. + + return false; +} + +bool X86ATTAsmParser::ParseOperand(X86Operand &Op) { + switch (getLexer().getKind()) { + default: + return ParseMemOperand(Op); + case AsmToken::Percent: + // FIXME: if a segment register, this could either be just the seg reg, or + // the start of a memory operand. + return ParseRegister(Op); + case AsmToken::Dollar: { + // $42 -> immediate. + getLexer().Lex(); + const MCExpr *Val; + if (getParser().ParseExpression(Val)) + return true; + Op = X86Operand::CreateImm(Val); + return false; + } + } +} + +/// ParseMemOperand: segment: disp(basereg, indexreg, scale) +bool X86ATTAsmParser::ParseMemOperand(X86Operand &Op) { + // FIXME: If SegReg ':' (e.g. %gs:), eat and remember. + unsigned SegReg = 0; + + // We have to disambiguate a parenthesized expression "(4+5)" from the start + // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The + // only way to do this without lookahead is to eat the ( and see what is after + // it. + const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); + if (getLexer().isNot(AsmToken::LParen)) { + if (getParser().ParseExpression(Disp)) return true; + + // After parsing the base expression we could either have a parenthesized + // memory address or not. If not, return now. If so, eat the (. + if (getLexer().isNot(AsmToken::LParen)) { + // Unless we have a segment register, treat this as an immediate. + if (SegReg) + Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 1); + else + Op = X86Operand::CreateImm(Disp); + return false; + } + + // Eat the '('. + getLexer().Lex(); + } else { + // Okay, we have a '('. We don't know if this is an expression or not, but + // so we have to eat the ( to see beyond it. + getLexer().Lex(); // Eat the '('. + + if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { + // Nothing to do here, fall into the code below with the '(' part of the + // memory operand consumed. + } else { + // It must be an parenthesized expression, parse it now. + if (getParser().ParseParenExpression(Disp)) + return true; + + // After parsing the base expression we could either have a parenthesized + // memory address or not. If not, return now. If so, eat the (. + if (getLexer().isNot(AsmToken::LParen)) { + // Unless we have a segment register, treat this as an immediate. + if (SegReg) + Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 1); + else + Op = X86Operand::CreateImm(Disp); + return false; + } + + // Eat the '('. + getLexer().Lex(); + } + } + + // If we reached here, then we just ate the ( of the memory operand. Process + // the rest of the memory operand. + unsigned BaseReg = 0, IndexReg = 0, Scale = 1; + + if (getLexer().is(AsmToken::Percent)) { + if (ParseRegister(Op)) + return true; + BaseReg = Op.getReg(); + } + + if (getLexer().is(AsmToken::Comma)) { + getLexer().Lex(); // Eat the comma. + + // Following the comma we should have either an index register, or a scale + // value. We don't support the later form, but we want to parse it + // correctly. + // + // Not that even though it would be completely consistent to support syntax + // like "1(%eax,,1)", the assembler doesn't. + if (getLexer().is(AsmToken::Percent)) { + if (ParseRegister(Op)) + return true; + IndexReg = Op.getReg(); + + if (getLexer().isNot(AsmToken::RParen)) { + // Parse the scale amount: + // ::= ',' [scale-expression] + if (getLexer().isNot(AsmToken::Comma)) + return true; + getLexer().Lex(); // Eat the comma. + + if (getLexer().isNot(AsmToken::RParen)) { + SMLoc Loc = getLexer().getTok().getLoc(); + + int64_t ScaleVal; + if (getParser().ParseAbsoluteExpression(ScaleVal)) + return true; + + // Validate the scale amount. + if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8) + return Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); + Scale = (unsigned)ScaleVal; + } + } + } else if (getLexer().isNot(AsmToken::RParen)) { + // Otherwise we have the unsupported form of a scale amount without an + // index. + SMLoc Loc = getLexer().getTok().getLoc(); + + int64_t Value; + if (getParser().ParseAbsoluteExpression(Value)) + return true; + + return Error(Loc, "cannot have scale factor without index register"); + } + } + + // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. + if (getLexer().isNot(AsmToken::RParen)) + return Error(getLexer().getTok().getLoc(), + "unexpected token in memory operand"); + getLexer().Lex(); // Eat the ')'. + + Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale); + return false; +} + +bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { + SmallVector<X86Operand, 8> Operands; + + Operands.push_back(X86Operand::CreateToken(Name)); + + SMLoc Loc = getLexer().getTok().getLoc(); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + + // Parse '*' modifier. + if (getLexer().is(AsmToken::Star)) { + getLexer().Lex(); // Eat the star. + Operands.push_back(X86Operand::CreateToken("*")); + } + + // Read the first operand. + Operands.push_back(X86Operand()); + if (ParseOperand(Operands.back())) + return true; + + while (getLexer().is(AsmToken::Comma)) { + getLexer().Lex(); // Eat the comma. + + // Parse and remember the operand. + Operands.push_back(X86Operand()); + if (ParseOperand(Operands.back())) + return true; + } + } + + if (!MatchInstruction(Operands, Inst)) + return false; + + // FIXME: We should give nicer diagnostics about the exact failure. + + Error(Loc, "unrecognized instruction"); + return true; +} + +bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal == ".word") + return ParseDirectiveWord(2, DirectiveID.getLoc()); + return true; +} + +/// ParseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + getLexer().Lex(); + } + } + + getLexer().Lex(); + return false; +} + +// Force static initialization. +extern "C" void LLVMInitializeX86AsmParser() { + RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target); + RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target); +} + +#include "X86GenAsmMatcher.inc" diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt index a28c826..b70a587 100644 --- a/lib/Target/X86/AsmPrinter/CMakeLists.txt +++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt @@ -1,9 +1,9 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMX86AsmPrinter - X86ATTAsmPrinter.cpp X86ATTInstPrinter.cpp X86AsmPrinter.cpp - X86IntelAsmPrinter.cpp + X86IntelInstPrinter.cpp + X86MCInstLower.cpp ) -add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
\ No newline at end of file +add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen) diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile index ba89ac6..2368761 100644 --- a/lib/Target/X86/AsmPrinter/Makefile +++ b/lib/Target/X86/AsmPrinter/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===## +##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index fa0ee75..bc70ffe 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -13,10 +13,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "asm-printer" +#include "X86ATTInstPrinter.h" #include "llvm/MC/MCInst.h" -#include "X86ATTAsmPrinter.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "X86GenInstrNames.inc" using namespace llvm; // Include the auto-generated portion of the assembly writer. @@ -25,9 +28,11 @@ using namespace llvm; #include "X86GenAsmWriter.inc" #undef MachineInstr -void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) { +void X86ATTInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } + +void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { switch (MI->getOperand(Op).getImm()) { - default: assert(0 && "Invalid ssecc argument!"); + default: llvm_unreachable("Invalid ssecc argument!"); case 0: O << "eq"; break; case 1: O << "lt"; break; case 2: O << "le"; break; @@ -39,61 +44,36 @@ void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) { } } - -void X86ATTAsmPrinter::printPICLabel(const MCInst *MI, unsigned Op) { - assert(0 && - "This is only used for MOVPC32r, should lower before asm printing!"); -} - - /// print_pcrel_imm - This is used to print an immediate value that ends up /// being encoded as a pc-relative value. These print slightly differently, for /// example, a $ is not emitted. -void X86ATTAsmPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) { +void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) { const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isImm()) O << Op.getImm(); - else if (Op.isMBBLabel()) - // FIXME: Keep in sync with printBasicBlockLabel. printBasicBlockLabel - // should eventually call into this code, not the other way around. - O << TAI->getPrivateGlobalPrefix() << "BB" << Op.getMBBLabelFunction() - << '_' << Op.getMBBLabelBlock(); - else - assert(0 && "Unknown pcrel immediate operand"); + else { + assert(Op.isExpr() && "unknown pcrel immediate operand"); + Op.getExpr()->print(O, &MAI); + } } - -void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo, - const char *Modifier) { +void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier) { assert(Modifier == 0 && "Modifiers should not be used"); const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { - O << '%'; - unsigned Reg = Op.getReg(); -#if 0 - if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { - MVT VT = (strcmp(Modifier+6,"64") == 0) ? - MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 : - ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8)); - Reg = getX86SubSuperRegister(Reg, VT); - } -#endif - O << TRI->getAsmName(Reg); - return; + O << '%' << getRegisterName(Op.getReg()); } else if (Op.isImm()) { - //if (!Modifier || (strcmp(Modifier, "debug") && strcmp(Modifier, "mem"))) + O << '$' << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); O << '$'; - O << Op.getImm(); - return; + Op.getExpr()->print(O, &MAI); } - - O << "<<UNKNOWN OPERAND KIND>>"; } -void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { - +void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { const MCOperand &BaseReg = MI->getOperand(Op); const MCOperand &IndexReg = MI->getOperand(Op+2); const MCOperand &DispSpec = MI->getOperand(Op+3); @@ -103,19 +83,11 @@ void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) O << DispVal; } else { - abort(); - //assert(DispSpec.isGlobal() || DispSpec.isCPI() || - // DispSpec.isJTI() || DispSpec.isSymbol()); - //printOperand(MI, Op+3, "mem"); + assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); + DispSpec.getExpr()->print(O, &MAI); } if (IndexReg.getReg() || BaseReg.getReg()) { - // There are cases where we can end up with ESP/RSP in the indexreg slot. - // If this happens, swap the base/index register to support assemblers that - // don't work when the index is *SP. - // FIXME: REMOVE THIS. - assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP); - O << '('; if (BaseReg.getReg()) printOperand(MI, Op); @@ -131,9 +103,9 @@ void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { } } -void X86ATTAsmPrinter::printMemReference(const MCInst *MI, unsigned Op) { - const MCOperand &Segment = MI->getOperand(Op+4); - if (Segment.getReg()) { +void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op) { + // If this has a segment register, print it. + if (MI->getOperand(Op+4).getReg()) { printOperand(MI, Op+4); O << ':'; } diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h new file mode 100644 index 0000000..5f28fa4 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -0,0 +1,86 @@ +//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an X86 MCInst to AT&T style .s file syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_ATT_INST_PRINTER_H +#define X86_ATT_INST_PRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + class MCOperand; + +class X86ATTInstPrinter : public MCInstPrinter { +public: + X86ATTInstPrinter(raw_ostream &O, const MCAsmInfo &MAI) + : MCInstPrinter(O, MAI) {} + + + virtual void printInst(const MCInst *MI); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI); + static const char *getRegisterName(unsigned RegNo); + + + void printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier = 0); + void printMemReference(const MCInst *MI, unsigned Op); + void printLeaMemReference(const MCInst *MI, unsigned Op); + void printSSECC(const MCInst *MI, unsigned Op); + void print_pcrel_imm(const MCInst *MI, unsigned OpNo); + + void printopaquemem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + + void printi8mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi16mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi32mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi64mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi128mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf32mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf64mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf80mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf128mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printlea32mem(const MCInst *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + void printlea64mem(const MCInst *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + void printlea64_32mem(const MCInst *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } +}; + +} + +#endif diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index e5d80a4..2a0290d 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -1,4 +1,4 @@ -//===-- X86AsmPrinter.cpp - Convert X86 LLVM IR to X86 assembly -----------===// +//===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===// // // The LLVM Compiler Infrastructure // @@ -7,42 +7,937 @@ // //===----------------------------------------------------------------------===// // -// This file the shared super class printer that converts from our internal -// representation of machine-dependent LLVM code to Intel and AT&T format -// assembly language. -// This printer is the output mechanism used by `llc'. +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to AT&T format assembly +// language. This printer is the output mechanism used by `llc'. // //===----------------------------------------------------------------------===// -#include "X86ATTAsmPrinter.h" -#include "X86IntelAsmPrinter.h" -#include "X86Subtarget.h" +#define DEBUG_TYPE "asm-printer" +#include "X86AsmPrinter.h" +#include "X86ATTInstPrinter.h" +#include "X86IntelInstPrinter.h" +#include "X86MCInstLower.h" +#include "X86.h" +#include "X86COFF.h" +#include "X86COFFMachineModuleInfo.h" +#include "X86MachineFunctionInfo.h" +#include "X86TargetMachine.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Mangler.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; -/// createX86CodePrinterPass - Returns a pass that prints the X86 assembly code -/// for a MachineFunction to the given output stream, using the given target -/// machine description. +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +//===----------------------------------------------------------------------===// +// Primitive Helper Functions. +//===----------------------------------------------------------------------===// + +void X86AsmPrinter::printMCInst(const MCInst *MI) { + if (MAI->getAssemblerDialect() == 0) + X86ATTInstPrinter(O, *MAI).printInstruction(MI); + else + X86IntelInstPrinter(O, *MAI).printInstruction(MI); +} + +void X86AsmPrinter::PrintPICBaseSymbol() const { + // FIXME: Gross const cast hack. + X86AsmPrinter *AP = const_cast<X86AsmPrinter*>(this); + X86MCInstLower(OutContext, 0, *AP).GetPICBaseSymbol()->print(O, MAI); +} + +void X86AsmPrinter::emitFunctionHeader(const MachineFunction &MF) { + unsigned FnAlign = MF.getAlignment(); + const Function *F = MF.getFunction(); + + if (Subtarget->isTargetCygMing()) { + X86COFFMachineModuleInfo &COFFMMI = + MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); + COFFMMI.DecorateCygMingName(CurrentFnName, F, *TM.getTargetData()); + } + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + EmitAlignment(FnAlign, F); + + switch (F->getLinkage()) { + default: llvm_unreachable("Unknown linkage type!"); + case Function::InternalLinkage: // Symbols default to internal. + case Function::PrivateLinkage: + break; + case Function::DLLExportLinkage: + case Function::ExternalLinkage: + O << "\t.globl\t" << CurrentFnName << '\n'; + break; + case Function::LinkerPrivateLinkage: + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + if (Subtarget->isTargetDarwin()) { + O << "\t.globl\t" << CurrentFnName << '\n'; + O << MAI->getWeakDefDirective() << CurrentFnName << '\n'; + } else if (Subtarget->isTargetCygMing()) { + O << "\t.globl\t" << CurrentFnName << "\n" + "\t.linkonce discard\n"; + } else { + O << "\t.weak\t" << CurrentFnName << '\n'; + } + break; + } + + printVisibility(CurrentFnName, F->getVisibility()); + + if (Subtarget->isTargetELF()) + O << "\t.type\t" << CurrentFnName << ",@function\n"; + else if (Subtarget->isTargetCygMing()) { + O << "\t.def\t " << CurrentFnName + << ";\t.scl\t" << + (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) + << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) + << ";\t.endef\n"; + } + + O << CurrentFnName << ':'; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, F, /*PrintType=*/false, F->getParent()); + } + O << '\n'; + + // Add some workaround for linkonce linkage on Cygwin\MinGW + if (Subtarget->isTargetCygMing() && + (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) + O << "Lllvm$workaround$fake$stub$" << CurrentFnName << ":\n"; +} + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + const Function *F = MF.getFunction(); + this->MF = &MF; + CallingConv::ID CC = F->getCallingConv(); + + SetupMachineFunction(MF); + O << "\n\n"; + + if (Subtarget->isTargetCOFF()) { + X86COFFMachineModuleInfo &COFFMMI = + MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); + + // Populate function information map. Don't want to populate + // non-stdcall or non-fastcall functions' information right now. + if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall) + COFFMMI.AddFunctionInfo(F, *MF.getInfo<X86MachineFunctionInfo>()); + } + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // Print the 'header' of function + emitFunctionHeader(MF); + + // Emit pre-function debug and/or EH information. + if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) + DW->BeginFunction(&MF); + + // Print out code for the function. + bool hasAnyRealCode = false; + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + EmitBasicBlockStart(I); + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + // Print the assembly for the instruction. + if (!II->isLabel()) + hasAnyRealCode = true; + printMachineInstruction(II); + } + } + + if (Subtarget->isTargetDarwin() && !hasAnyRealCode) { + // If the function is empty, then we need to emit *something*. Otherwise, + // the function's label might be associated with something that it wasn't + // meant to be associated with. We emit a noop in this situation. + // We are assuming inline asms are code. + O << "\tnop\n"; + } + + if (MAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n'; + + // Emit post-function debug information. + if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) + DW->EndFunction(&MF); + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + // We didn't modify anything. + return false; +} + +/// printSymbolOperand - Print a raw symbol reference operand. This handles +/// jump tables, constant pools, global address and external symbols, all of +/// which print to a label with various suffixes for relocation types etc. +void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { + switch (MO.getType()) { + default: llvm_unreachable("unknown symbol type!"); + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' + << MO.getIndex(); + break; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' + << MO.getIndex(); + printOffset(MO.getOffset()); + break; + case MachineOperand::MO_GlobalAddress: { + const GlobalValue *GV = MO.getGlobal(); + + const char *Suffix = ""; + if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) + Suffix = "$stub"; + else if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || + MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || + MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) + Suffix = "$non_lazy_ptr"; + + std::string Name = Mang->getMangledName(GV, Suffix, Suffix[0] != '\0'); + if (Subtarget->isTargetCygMing()) { + X86COFFMachineModuleInfo &COFFMMI = + MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); + COFFMMI.DecorateCygMingName(Name, GV, *TM.getTargetData()); + } + + // Handle dllimport linkage. + if (MO.getTargetFlags() == X86II::MO_DLLIMPORT) + Name = "__imp_" + Name; + + if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || + MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) { + SmallString<128> NameStr; + Mang->getNameWithPrefix(NameStr, GV, true); + NameStr += "$non_lazy_ptr"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + + const MCSymbol *&StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym); + if (StubSym == 0) { + NameStr.clear(); + Mang->getNameWithPrefix(NameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + } + } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){ + SmallString<128> NameStr; + Mang->getNameWithPrefix(NameStr, GV, true); + NameStr += "$non_lazy_ptr"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + const MCSymbol *&StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym); + if (StubSym == 0) { + NameStr.clear(); + Mang->getNameWithPrefix(NameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + } + } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { + SmallString<128> NameStr; + Mang->getNameWithPrefix(NameStr, GV, true); + NameStr += "$stub"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + const MCSymbol *&StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); + if (StubSym == 0) { + NameStr.clear(); + Mang->getNameWithPrefix(NameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + } + } + + // If the name begins with a dollar-sign, enclose it in parens. We do this + // to avoid having it look like an integer immediate to the assembler. + if (Name[0] == '$') + O << '(' << Name << ')'; + else + O << Name; + + printOffset(MO.getOffset()); + break; + } + case MachineOperand::MO_ExternalSymbol: { + std::string Name = Mang->makeNameProper(MO.getSymbolName()); + if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { + Name += "$stub"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(Name); + const MCSymbol *&StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); + if (StubSym == 0) { + Name.erase(Name.end()-5, Name.end()); + StubSym = OutContext.GetOrCreateSymbol(Name); + } + } + + // If the name begins with a dollar-sign, enclose it in parens. We do this + // to avoid having it look like an integer immediate to the assembler. + if (Name[0] == '$') + O << '(' << Name << ')'; + else + O << Name; + break; + } + } + + switch (MO.getTargetFlags()) { + default: + llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. + break; + case X86II::MO_DARWIN_NONLAZY: + case X86II::MO_DLLIMPORT: + case X86II::MO_DARWIN_STUB: + // These affect the name of the symbol, not any suffix. + break; + case X86II::MO_GOT_ABSOLUTE_ADDRESS: + O << " + [.-"; + PrintPICBaseSymbol(); + O << ']'; + break; + case X86II::MO_PIC_BASE_OFFSET: + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: + O << '-'; + PrintPICBaseSymbol(); + break; + case X86II::MO_TLSGD: O << "@TLSGD"; break; + case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break; + case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break; + case X86II::MO_TPOFF: O << "@TPOFF"; break; + case X86II::MO_NTPOFF: O << "@NTPOFF"; break; + case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break; + case X86II::MO_GOT: O << "@GOT"; break; + case X86II::MO_GOTOFF: O << "@GOTOFF"; break; + case X86II::MO_PLT: O << "@PLT"; break; + } +} + +/// print_pcrel_imm - This is used to print an immediate value that ends up +/// being encoded as a pc-relative value. These print slightly differently, for +/// example, a $ is not emitted. +void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + default: llvm_unreachable("Unknown pcrel immediate operand"); + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); + return; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + printSymbolOperand(MO); + return; + } +} + + +void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, + const char *Modifier) { + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + default: llvm_unreachable("unknown operand type!"); + case MachineOperand::MO_Register: { + O << '%'; + unsigned Reg = MO.getReg(); + if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { + EVT VT = (strcmp(Modifier+6,"64") == 0) ? + MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 : + ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8)); + Reg = getX86SubSuperRegister(Reg, VT); + } + O << X86ATTInstPrinter::getRegisterName(Reg); + return; + } + + case MachineOperand::MO_Immediate: + O << '$' << MO.getImm(); + return; + + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: { + O << '$'; + printSymbolOperand(MO); + break; + } + } +} + +void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) { + unsigned char value = MI->getOperand(Op).getImm(); + assert(value <= 7 && "Invalid ssecc argument!"); + switch (value) { + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + } +} + +void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier) { + const MachineOperand &BaseReg = MI->getOperand(Op); + const MachineOperand &IndexReg = MI->getOperand(Op+2); + const MachineOperand &DispSpec = MI->getOperand(Op+3); + + // If we really don't want to print out (rip), don't. + bool HasBaseReg = BaseReg.getReg() != 0; + if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") && + BaseReg.getReg() == X86::RIP) + HasBaseReg = false; + + // HasParenPart - True if we will print out the () part of the mem ref. + bool HasParenPart = IndexReg.getReg() || HasBaseReg; + + if (DispSpec.isImm()) { + int DispVal = DispSpec.getImm(); + if (DispVal || !HasParenPart) + O << DispVal; + } else { + assert(DispSpec.isGlobal() || DispSpec.isCPI() || + DispSpec.isJTI() || DispSpec.isSymbol()); + printSymbolOperand(MI->getOperand(Op+3)); + } + + if (HasParenPart) { + assert(IndexReg.getReg() != X86::ESP && + "X86 doesn't allow scaling by ESP"); + + O << '('; + if (HasBaseReg) + printOperand(MI, Op, Modifier); + + if (IndexReg.getReg()) { + O << ','; + printOperand(MI, Op+2, Modifier); + unsigned ScaleVal = MI->getOperand(Op+1).getImm(); + if (ScaleVal != 1) + O << ',' << ScaleVal; + } + O << ')'; + } +} + +void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier) { + assert(isMem(MI, Op) && "Invalid memory reference!"); + const MachineOperand &Segment = MI->getOperand(Op+4); + if (Segment.getReg()) { + printOperand(MI, Op+4, Modifier); + O << ':'; + } + printLeaMemReference(MI, Op, Modifier); +} + +void X86AsmPrinter::printPICJumpTableSetLabel(unsigned uid, + const MachineBasicBlock *MBB) const { + if (!MAI->getSetDirective()) + return; + + // We don't need .set machinery if we have GOT-style relocations + if (Subtarget->isPICStyleGOT()) + return; + + O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix() + << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ','; + + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + + if (Subtarget->isPICStyleRIPRel()) + O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << uid << '\n'; + else { + O << '-'; + PrintPICBaseSymbol(); + O << '\n'; + } +} + + +void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) { + PrintPICBaseSymbol(); + O << '\n'; + PrintPICBaseSymbol(); + O << ':'; +} + +void X86AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned uid) const { + const char *JTEntryDirective = MJTI->getEntrySize() == 4 ? + MAI->getData32bitsDirective() : MAI->getData64bitsDirective(); + + O << JTEntryDirective << ' '; + + if (Subtarget->isPICStyleRIPRel() || Subtarget->isPICStyleStubPIC()) { + O << MAI->getPrivateGlobalPrefix() << getFunctionNumber() + << '_' << uid << "_set_" << MBB->getNumber(); + } else if (Subtarget->isPICStyleGOT()) { + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + O << "@GOTOFF"; + } else + GetMBBSymbol(MBB->getNumber())->print(O, MAI); +} + +bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode) { + unsigned Reg = MO.getReg(); + switch (Mode) { + default: return true; // Unknown mode. + case 'b': // Print QImode register + Reg = getX86SubSuperRegister(Reg, MVT::i8); + break; + case 'h': // Print QImode high register + Reg = getX86SubSuperRegister(Reg, MVT::i8, true); + break; + case 'w': // Print HImode register + Reg = getX86SubSuperRegister(Reg, MVT::i16); + break; + case 'k': // Print SImode register + Reg = getX86SubSuperRegister(Reg, MVT::i32); + break; + case 'q': // Print DImode register + Reg = getX86SubSuperRegister(Reg, MVT::i64); + break; + } + + O << '%' << X86ATTInstPrinter::getRegisterName(Reg); + return false; +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. /// -FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o, - X86TargetMachine &tm, - bool verbose) { - const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>(); +bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + const MachineOperand &MO = MI->getOperand(OpNo); + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'a': // This is an address. Currently only 'i' and 'r' are expected. + if (MO.isImm()) { + O << MO.getImm(); + return false; + } + if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) { + printSymbolOperand(MO); + return false; + } + if (MO.isReg()) { + O << '('; + printOperand(MI, OpNo); + O << ')'; + return false; + } + return true; + + case 'c': // Don't print "$" before a global var name or constant. + if (MO.isImm()) + O << MO.getImm(); + else if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) + printSymbolOperand(MO); + else + printOperand(MI, OpNo); + return false; + + case 'A': // Print '*' before a register (it must be a register) + if (MO.isReg()) { + O << '*'; + printOperand(MI, OpNo); + return false; + } + return true; + + case 'b': // Print QImode register + case 'h': // Print QImode high register + case 'w': // Print HImode register + case 'k': // Print SImode register + case 'q': // Print DImode register + if (MO.isReg()) + return printAsmMRegister(MO, ExtraCode[0]); + printOperand(MI, OpNo); + return false; + + case 'P': // This is the operand of a call, treat specially. + print_pcrel_imm(MI, OpNo); + return false; + + case 'n': // Negate the immediate or print a '-' before the operand. + // Note: this is a temporary solution. It should be handled target + // independently as part of the 'MC' work. + if (MO.isImm()) { + O << -MO.getImm(); + return false; + } + O << '-'; + } + } - if (Subtarget->isFlavorIntel()) - return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); - return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); + printOperand(MI, OpNo); + return false; } -namespace { - static struct Register { - Register() { - X86TargetMachine::registerAsmPrinter(createX86CodePrinterPass); +bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'b': // Print QImode register + case 'h': // Print QImode high register + case 'w': // Print HImode register + case 'k': // Print SImode register + case 'q': // Print SImode register + // These only apply to registers, ignore on mem. + break; + case 'P': // Don't print @PLT, but do print as memory. + printMemReference(MI, OpNo, "no-rip"); + return false; } - } Registrator; + } + printMemReference(MI, OpNo); + return false; +} + + + +/// printMachineInstruction -- Print out a single X86 LLVM instruction MI in +/// AT&T syntax to the current output stream. +/// +void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + processDebugLoc(MI, true); + + printInstructionThroughMCStreamer(MI); + + if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + EmitComments(*MI); + O << '\n'; + + processDebugLoc(MI, false); } -extern "C" int X86AsmPrinterForceLink; -int X86AsmPrinterForceLink = 0; +void X86AsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { + if (!GVar->hasInitializer()) + return; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) { + if (Subtarget->isTargetDarwin() && + TM.getRelocationModel() == Reloc::Static) { + if (GVar->getName() == "llvm.global_ctors") + O << ".reference .constructors_used\n"; + else if (GVar->getName() == "llvm.global_dtors") + O << ".reference .destructors_used\n"; + } + return; + } + + const TargetData *TD = TM.getTargetData(); + + std::string name = Mang->getMangledName(GVar); + Constant *C = GVar->getInitializer(); + const Type *Type = C->getType(); + unsigned Size = TD->getTypeAllocSize(Type); + unsigned Align = TD->getPreferredAlignmentLog(GVar); + + printVisibility(name, GVar->getVisibility()); + + if (Subtarget->isTargetELF()) + O << "\t.type\t" << name << ",@object\n"; + + + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GVar, TM); + const MCSection *TheSection = + getObjFileLowering().SectionForGlobal(GVar, GVKind, Mang, TM); + OutStreamer.SwitchSection(TheSection); + + // FIXME: get this stuff from section kind flags. + if (C->isNullValue() && !GVar->hasSection() && + // Don't put things that should go in the cstring section into "comm". + !TheSection->getKind().isMergeableCString()) { + if (GVar->hasExternalLinkage()) { + if (const char *Directive = MAI->getZeroFillDirective()) { + O << "\t.globl " << name << '\n'; + O << Directive << "__DATA, __common, " << name << ", " + << Size << ", " << Align << '\n'; + return; + } + } + + if (!GVar->isThreadLocal() && + (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (MAI->getLCOMMDirective() != NULL) { + if (GVar->hasLocalLinkage()) { + O << MAI->getLCOMMDirective() << name << ',' << Size; + if (Subtarget->isTargetDarwin()) + O << ',' << Align; + } else if (Subtarget->isTargetDarwin() && !GVar->hasCommonLinkage()) { + O << "\t.globl " << name << '\n' + << MAI->getWeakDefDirective() << name << '\n'; + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); + } + O << '\n'; + EmitGlobalConstant(C); + return; + } else { + O << MAI->getCOMMDirective() << name << ',' << Size; + if (MAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + } + } else { + if (!Subtarget->isTargetCygMing()) { + if (GVar->hasLocalLinkage()) + O << "\t.local\t" << name << '\n'; + } + O << MAI->getCOMMDirective() << name << ',' << Size; + if (MAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + } + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); + } + O << '\n'; + return; + } + } + + switch (GVar->getLinkage()) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::LinkerPrivateLinkage: + if (Subtarget->isTargetDarwin()) { + O << "\t.globl " << name << '\n' + << MAI->getWeakDefDirective() << name << '\n'; + } else if (Subtarget->isTargetCygMing()) { + O << "\t.globl\t" << name << "\n" + "\t.linkonce same_size\n"; + } else { + O << "\t.weak\t" << name << '\n'; + } + break; + case GlobalValue::DLLExportLinkage: + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << "\t.globl " << name << '\n'; + // FALL THROUGH + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + break; + default: + llvm_unreachable("Unknown linkage type!"); + } + + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm){ + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); + } + O << '\n'; + + EmitGlobalConstant(C); + + if (MAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << name << ", " << Size << '\n'; +} + +void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { + if (Subtarget->isTargetDarwin()) { + // All darwin targets use mach-o. + TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + + MachineModuleInfoMachO &MMIMacho = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + // Output stubs for dynamically-linked functions. + MachineModuleInfoMachO::SymbolListTy Stubs; + + Stubs = MMIMacho.GetFnStubList(); + if (!Stubs.empty()) { + const MCSection *TheSection = + TLOFMacho.getMachOSection("__IMPORT", "__jump_table", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 5, SectionKind::getMetadata()); + OutStreamer.SwitchSection(TheSection); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + Stubs[i].first->print(O, MAI); + O << ":\n" << "\t.indirect_symbol "; + // Get the MCSymbol without the $stub suffix. + Stubs[i].second->print(O, MAI); + O << "\n\thlt ; hlt ; hlt ; hlt ; hlt\n"; + } + O << '\n'; + + Stubs.clear(); + } + + // Output stubs for external and common global variables. + Stubs = MMIMacho.GetGVStubList(); + if (!Stubs.empty()) { + const MCSection *TheSection = + TLOFMacho.getMachOSection("__IMPORT", "__pointers", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + OutStreamer.SwitchSection(TheSection); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + Stubs[i].first->print(O, MAI); + O << ":\n\t.indirect_symbol "; + Stubs[i].second->print(O, MAI); + O << "\n\t.long\t0\n"; + } + Stubs.clear(); + } + + Stubs = MMIMacho.GetHiddenGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + EmitAlignment(2); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + Stubs[i].first->print(O, MAI); + O << ":\n" << MAI->getData32bitsDirective(); + Stubs[i].second->print(O, MAI); + O << '\n'; + } + Stubs.clear(); + } + + // Funny Darwin hack: This flag tells the linker that no global symbols + // contain code that falls through to other global symbols (e.g. the obvious + // implementation of multiple entry points). If this doesn't occur, the + // linker can safely perform dead code stripping. Since LLVM never + // generates code that does this, it is always safe to set. + O << "\t.subsections_via_symbols\n"; + } + + if (Subtarget->isTargetCOFF()) { + // Necessary for dllexport support + std::vector<std::string> DLLExportedFns, DLLExportedGlobals; + + X86COFFMachineModuleInfo &COFFMMI = + MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); + TargetLoweringObjectFileCOFF &TLOFCOFF = + static_cast<TargetLoweringObjectFileCOFF&>(getObjFileLowering()); + + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->hasDLLExportLinkage()) + DLLExportedFns.push_back(Mang->getMangledName(I)); + + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (I->hasDLLExportLinkage()) + DLLExportedGlobals.push_back(Mang->getMangledName(I)); + + if (Subtarget->isTargetCygMing()) { + // Emit type information for external functions + for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(), + E = COFFMMI.stub_end(); I != E; ++I) { + O << "\t.def\t " << I->getKeyData() + << ";\t.scl\t" << COFF::C_EXT + << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) + << ";\t.endef\n"; + } + } + + // Output linker support code for dllexported globals on windows. + if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { + OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve", + true, + SectionKind::getMetadata())); + + for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) + O << "\t.ascii \" -export:" << DLLExportedGlobals[i] << ",data\"\n"; + + for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) + O << "\t.ascii \" -export:" << DLLExportedFns[i] << "\"\n"; + } + } +} + + +//===----------------------------------------------------------------------===// +// Target Registry Stuff +//===----------------------------------------------------------------------===// + +static MCInstPrinter *createX86MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + raw_ostream &O) { + if (SyntaxVariant == 0) + return new X86ATTInstPrinter(O, MAI); + if (SyntaxVariant == 1) + return new X86IntelInstPrinter(O, MAI); + return 0; +} // Force static initialization. -extern "C" void LLVMInitializeX86AsmPrinter() { } +extern "C" void LLVMInitializeX86AsmPrinter() { + RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target); + RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target); + + TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter); +} diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h new file mode 100644 index 0000000..0351829 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h @@ -0,0 +1,150 @@ +//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AT&T assembly code printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86ASMPRINTER_H +#define X86ASMPRINTER_H + +#include "../X86.h" +#include "../X86MachineFunctionInfo.h" +#include "../X86TargetMachine.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MachineJumpTableInfo; +class MCContext; +class MCInst; +class MCStreamer; +class MCSymbol; + +class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { + const X86Subtarget *Subtarget; + public: + explicit X86AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V) { + Subtarget = &TM.getSubtarget<X86Subtarget>(); + } + + virtual const char *getPassName() const { + return "X86 AT&T-Style Assembly Printer"; + } + + const X86Subtarget &getSubtarget() const { return *Subtarget; } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineModuleInfo>(); + AU.addRequired<DwarfWriter>(); + AsmPrinter::getAnalysisUsage(AU); + } + + + virtual void EmitEndOfAsmFile(Module &M); + + void printInstructionThroughMCStreamer(const MachineInstr *MI); + + + void printMCInst(const MCInst *MI); + + void printSymbolOperand(const MachineOperand &MO); + + + + // These methods are used by the tablegen'erated instruction printer. + void printOperand(const MachineInstr *MI, unsigned OpNo, + const char *Modifier = 0); + void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo); + + void printopaquemem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + + void printi8mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi16mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi32mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi64mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi128mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf32mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf64mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf80mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf128mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printlea32mem(const MachineInstr *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + void printlea64mem(const MachineInstr *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo, "subreg64"); + } + + bool printAsmMRegister(const MachineOperand &MO, char Mode); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + + void printMachineInstruction(const MachineInstr *MI); + void printSSECC(const MachineInstr *MI, unsigned Op); + void printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier=NULL); + void printLeaMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier=NULL); + void printPICJumpTableSetLabel(unsigned uid, + const MachineBasicBlock *MBB) const; + void printPICJumpTableSetLabel(unsigned uid, unsigned uid2, + const MachineBasicBlock *MBB) const { + AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB); + } + void printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned uid) const; + + void printPICLabel(const MachineInstr *MI, unsigned Op); + void PrintGlobalVariable(const GlobalVariable* GVar); + + void PrintPICBaseSymbol() const; + + bool runOnMachineFunction(MachineFunction &F); + + void emitFunctionHeader(const MachineFunction &MF); + +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp new file mode 100644 index 0000000..fde5902 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp @@ -0,0 +1,131 @@ +//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes code for rendering MCInst instances as AT&T-style +// assembly. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "X86IntelInstPrinter.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "X86GenInstrNames.inc" +using namespace llvm; + +// Include the auto-generated portion of the assembly writer. +#define MachineInstr MCInst +#define NO_ASM_WRITER_BOILERPLATE +#include "X86GenAsmWriter1.inc" +#undef MachineInstr + +void X86IntelInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } + +void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { + switch (MI->getOperand(Op).getImm()) { + default: llvm_unreachable("Invalid ssecc argument!"); + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + } +} + +/// print_pcrel_imm - This is used to print an immediate value that ends up +/// being encoded as a pc-relative value. +void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) + O << Op.getImm(); + else { + assert(Op.isExpr() && "unknown pcrel immediate operand"); + Op.getExpr()->print(O, &MAI); + } +} + +static void PrintRegName(raw_ostream &O, StringRef RegName) { + for (unsigned i = 0, e = RegName.size(); i != e; ++i) + O << (char)toupper(RegName[i]); +} + +void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier) { + assert(Modifier == 0 && "Modifiers should not be used"); + + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + PrintRegName(O, getRegisterName(Op.getReg())); + } else if (Op.isImm()) { + O << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + Op.getExpr()->print(O, &MAI); + } +} + +void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { + const MCOperand &BaseReg = MI->getOperand(Op); + unsigned ScaleVal = MI->getOperand(Op+1).getImm(); + const MCOperand &IndexReg = MI->getOperand(Op+2); + const MCOperand &DispSpec = MI->getOperand(Op+3); + + O << '['; + + bool NeedPlus = false; + if (BaseReg.getReg()) { + printOperand(MI, Op); + NeedPlus = true; + } + + if (IndexReg.getReg()) { + if (NeedPlus) O << " + "; + if (ScaleVal != 1) + O << ScaleVal << '*'; + printOperand(MI, Op+2); + NeedPlus = true; + } + + + if (!DispSpec.isImm()) { + if (NeedPlus) O << " + "; + assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); + DispSpec.getExpr()->print(O, &MAI); + } else { + int64_t DispVal = DispSpec.getImm(); + if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { + if (NeedPlus) { + if (DispVal > 0) + O << " + "; + else { + O << " - "; + DispVal = -DispVal; + } + } + O << DispVal; + } + } + + O << ']'; +} + +void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op) { + // If this has a segment register, print it. + if (MI->getOperand(Op+4).getReg()) { + printOperand(MI, Op+4); + O << ':'; + } + printLeaMemReference(MI, Op); +} diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h new file mode 100644 index 0000000..1976177 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -0,0 +1,99 @@ +//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an X86 MCInst to intel style .s file syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_INTEL_INST_PRINTER_H +#define X86_INTEL_INST_PRINTER_H + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + class MCOperand; + +class X86IntelInstPrinter : public MCInstPrinter { +public: + X86IntelInstPrinter(raw_ostream &O, const MCAsmInfo &MAI) + : MCInstPrinter(O, MAI) {} + + virtual void printInst(const MCInst *MI); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI); + static const char *getRegisterName(unsigned RegNo); + + + void printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier = 0); + void printMemReference(const MCInst *MI, unsigned Op); + void printLeaMemReference(const MCInst *MI, unsigned Op); + void printSSECC(const MCInst *MI, unsigned Op); + void print_pcrel_imm(const MCInst *MI, unsigned OpNo); + + void printopaquemem(const MCInst *MI, unsigned OpNo) { + O << "OPAQUE PTR "; + printMemReference(MI, OpNo); + } + + void printi8mem(const MCInst *MI, unsigned OpNo) { + O << "BYTE PTR "; + printMemReference(MI, OpNo); + } + void printi16mem(const MCInst *MI, unsigned OpNo) { + O << "WORD PTR "; + printMemReference(MI, OpNo); + } + void printi32mem(const MCInst *MI, unsigned OpNo) { + O << "DWORD PTR "; + printMemReference(MI, OpNo); + } + void printi64mem(const MCInst *MI, unsigned OpNo) { + O << "QWORD PTR "; + printMemReference(MI, OpNo); + } + void printi128mem(const MCInst *MI, unsigned OpNo) { + O << "XMMWORD PTR "; + printMemReference(MI, OpNo); + } + void printf32mem(const MCInst *MI, unsigned OpNo) { + O << "DWORD PTR "; + printMemReference(MI, OpNo); + } + void printf64mem(const MCInst *MI, unsigned OpNo) { + O << "QWORD PTR "; + printMemReference(MI, OpNo); + } + void printf80mem(const MCInst *MI, unsigned OpNo) { + O << "XWORD PTR "; + printMemReference(MI, OpNo); + } + void printf128mem(const MCInst *MI, unsigned OpNo) { + O << "XMMWORD PTR "; + printMemReference(MI, OpNo); + } + void printlea32mem(const MCInst *MI, unsigned OpNo) { + O << "DWORD PTR "; + printLeaMemReference(MI, OpNo); + } + void printlea64mem(const MCInst *MI, unsigned OpNo) { + O << "QWORD PTR "; + printLeaMemReference(MI, OpNo); + } + void printlea64_32mem(const MCInst *MI, unsigned OpNo) { + O << "QWORD PTR "; + printLeaMemReference(MI, OpNo); + } +}; + +} + +#endif diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp new file mode 100644 index 0000000..5ccddf5 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -0,0 +1,485 @@ +//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower X86 MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "X86MCInstLower.h" +#include "X86AsmPrinter.h" +#include "X86MCAsmInfo.h" +#include "X86COFFMachineModuleInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Mangler.h" +#include "llvm/ADT/SmallString.h" +using namespace llvm; + + +const X86Subtarget &X86MCInstLower::getSubtarget() const { + return AsmPrinter.getSubtarget(); +} + +MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { + assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin"); + return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); +} + + +MCSymbol *X86MCInstLower::GetPICBaseSymbol() const { + SmallString<60> Name; + raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() + << AsmPrinter.getFunctionNumber() << "$pb"; + return Ctx.GetOrCreateSymbol(Name.str()); +} + + +/// LowerGlobalAddressOperand - Lower an MO_GlobalAddress operand to an +/// MCOperand. +MCSymbol *X86MCInstLower:: +GetGlobalAddressSymbol(const MachineOperand &MO) const { + const GlobalValue *GV = MO.getGlobal(); + + bool isImplicitlyPrivate = false; + if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB || + MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || + MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || + MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) + isImplicitlyPrivate = true; + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); + + if (getSubtarget().isTargetCygMing()) { + X86COFFMachineModuleInfo &COFFMMI = + AsmPrinter.MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); + COFFMMI.DecorateCygMingName(Name, GV, *AsmPrinter.TM.getTargetData()); + } + + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. + case X86II::MO_PIC_BASE_OFFSET: // Doesn't modify symbol name. + break; + case X86II::MO_DLLIMPORT: { + // Handle dllimport linkage. + const char *Prefix = "__imp_"; + Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix)); + break; + } + case X86II::MO_DARWIN_NONLAZY: + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { + Name += "$non_lazy_ptr"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + + const MCSymbol *&StubSym = getMachOMMI().getGVStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = Ctx.GetOrCreateSymbol(Name.str()); + } + return Sym; + } + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: { + Name += "$non_lazy_ptr"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + const MCSymbol *&StubSym = getMachOMMI().getHiddenGVStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = Ctx.GetOrCreateSymbol(Name.str()); + } + return Sym; + } + case X86II::MO_DARWIN_STUB: { + Name += "$stub"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = Ctx.GetOrCreateSymbol(Name.str()); + } + return Sym; + } + // FIXME: These probably should be a modifier on the symbol or something?? + case X86II::MO_TLSGD: Name += "@TLSGD"; break; + case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; + case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; + case X86II::MO_TPOFF: Name += "@TPOFF"; break; + case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; + case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; + case X86II::MO_GOT: Name += "@GOT"; break; + case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; + case X86II::MO_PLT: Name += "@PLT"; break; + } + + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *X86MCInstLower:: +GetExternalSymbolSymbol(const MachineOperand &MO) const { + SmallString<128> Name; + Name += AsmPrinter.MAI->getGlobalPrefix(); + Name += MO.getSymbolName(); + + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. + case X86II::MO_GOT_ABSOLUTE_ADDRESS: // Doesn't modify symbol name. + case X86II::MO_PIC_BASE_OFFSET: // Doesn't modify symbol name. + break; + case X86II::MO_DLLIMPORT: { + // Handle dllimport linkage. + const char *Prefix = "__imp_"; + Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix)); + break; + } + case X86II::MO_DARWIN_STUB: { + Name += "$stub"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym); + + if (StubSym == 0) { + Name.erase(Name.end()-5, Name.end()); + StubSym = Ctx.GetOrCreateSymbol(Name.str()); + } + return Sym; + } + // FIXME: These probably should be a modifier on the symbol or something?? + case X86II::MO_TLSGD: Name += "@TLSGD"; break; + case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; + case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; + case X86II::MO_TPOFF: Name += "@TPOFF"; break; + case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; + case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; + case X86II::MO_GOT: Name += "@GOT"; break; + case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; + case X86II::MO_PLT: Name += "@PLT"; break; + } + + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *X86MCInstLower::GetJumpTableSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "JTI" + << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex(); + + switch (MO.getTargetFlags()) { + default: + llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. + case X86II::MO_PIC_BASE_OFFSET: + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: + break; + // FIXME: These probably should be a modifier on the symbol or something?? + case X86II::MO_TLSGD: Name += "@TLSGD"; break; + case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; + case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; + case X86II::MO_TPOFF: Name += "@TPOFF"; break; + case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; + case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; + case X86II::MO_GOT: Name += "@GOT"; break; + case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; + case X86II::MO_PLT: Name += "@PLT"; break; + } + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + + +MCSymbol *X86MCInstLower:: +GetConstantPoolIndexSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "CPI" + << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex(); + + switch (MO.getTargetFlags()) { + default: + llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. + case X86II::MO_PIC_BASE_OFFSET: + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: + break; + // FIXME: These probably should be a modifier on the symbol or something?? + case X86II::MO_TLSGD: Name += "@TLSGD"; break; + case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; + case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; + case X86II::MO_TPOFF: Name += "@TPOFF"; break; + case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; + case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; + case X86II::MO_GOT: Name += "@GOT"; break; + case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; + case X86II::MO_PLT: Name += "@PLT"; break; + } + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MCSymbol *Sym) const { + // FIXME: We would like an efficient form for this, so we don't have to do a + // lot of extra uniquing. + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. + + // These affect the name of the symbol, not any suffix. + case X86II::MO_DARWIN_NONLAZY: + case X86II::MO_DLLIMPORT: + case X86II::MO_DARWIN_STUB: + case X86II::MO_TLSGD: + case X86II::MO_GOTTPOFF: + case X86II::MO_INDNTPOFF: + case X86II::MO_TPOFF: + case X86II::MO_NTPOFF: + case X86II::MO_GOTPCREL: + case X86II::MO_GOT: + case X86II::MO_GOTOFF: + case X86II::MO_PLT: + break; + case X86II::MO_PIC_BASE_OFFSET: + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: + // Subtract the pic base. + Expr = MCBinaryExpr::CreateSub(Expr, + MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx), + Ctx); + break; + } + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), Ctx), + Ctx); + return MCOperand::CreateExpr(Expr); +} + + + +static void lower_subreg32(MCInst *MI, unsigned OpNo) { + // Convert registers in the addr mode according to subreg32. + unsigned Reg = MI->getOperand(OpNo).getReg(); + if (Reg != 0) + MI->getOperand(OpNo).setReg(getX86SubSuperRegister(Reg, MVT::i32)); +} + +static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) { + // Convert registers in the addr mode according to subreg64. + for (unsigned i = 0; i != 4; ++i) { + if (!MI->getOperand(OpNo+i).isReg()) continue; + + unsigned Reg = MI->getOperand(OpNo+i).getReg(); + if (Reg == 0) continue; + + MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64)); + } +} + + + +void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + switch (MO.getType()) { + default: + MI->dump(); + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + AsmPrinter.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO)); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); + break; + } + + OutMI.addOperand(MCOp); + } + + // Handle a few special cases to eliminate operand modifiers. + switch (OutMI.getOpcode()) { + case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand. + lower_lea64_32mem(&OutMI, 1); + break; + case X86::MOV16r0: + OutMI.setOpcode(X86::MOV32r0); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX16rr8: + OutMI.setOpcode(X86::MOVZX32rr8); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX16rm8: + OutMI.setOpcode(X86::MOVZX32rm8); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVSX16rr8: + OutMI.setOpcode(X86::MOVSX32rr8); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVSX16rm8: + OutMI.setOpcode(X86::MOVSX32rm8); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX64rr32: + OutMI.setOpcode(X86::MOV32rr); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX64rm32: + OutMI.setOpcode(X86::MOV32rm); + lower_subreg32(&OutMI, 0); + break; + case X86::MOV64ri64i32: + OutMI.setOpcode(X86::MOV32ri); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX64rr8: + OutMI.setOpcode(X86::MOVZX32rr8); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX64rm8: + OutMI.setOpcode(X86::MOVZX32rm8); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX64rr16: + OutMI.setOpcode(X86::MOVZX32rr16); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX64rm16: + OutMI.setOpcode(X86::MOVZX32rm16); + lower_subreg32(&OutMI, 0); + break; + } +} + + + +void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { + X86MCInstLower MCInstLowering(OutContext, Mang, *this); + switch (MI->getOpcode()) { + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + case TargetInstrInfo::GC_LABEL: + printLabel(MI); + return; + case TargetInstrInfo::INLINEASM: + O << '\t'; + printInlineAsm(MI); + return; + case TargetInstrInfo::IMPLICIT_DEF: + printImplicitDef(MI); + return; + case TargetInstrInfo::KILL: + return; + case X86::MOVPC32r: { + MCInst TmpInst; + // This is a pseudo op for a two instruction sequence with a label, which + // looks like: + // call "L1$pb" + // "L1$pb": + // popl %esi + + // Emit the call. + MCSymbol *PICBase = MCInstLowering.GetPICBaseSymbol(); + TmpInst.setOpcode(X86::CALLpcrel32); + // FIXME: We would like an efficient form for this, so we don't have to do a + // lot of extra uniquing. + TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase, + OutContext))); + printMCInst(&TmpInst); + O << '\n'; + + // Emit the label. + OutStreamer.EmitLabel(PICBase); + + // popl $reg + TmpInst.setOpcode(X86::POP32r); + TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg()); + printMCInst(&TmpInst); + return; + } + + case X86::ADD32ri: { + // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. + if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) + break; + + // Okay, we have something like: + // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) + + // For this, we want to print something like: + // MYGLOBAL + (. - PICBASE) + // However, we can't generate a ".", so just emit a new label here and refer + // to it. We know that this operand flag occurs at most once per function. + SmallString<64> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() + << "picbaseref" << getFunctionNumber(); + MCSymbol *DotSym = OutContext.GetOrCreateSymbol(Name.str()); + OutStreamer.EmitLabel(DotSym); + + // Now that we have emitted the label, lower the complex operand expression. + MCSymbol *OpSym = MCInstLowering.GetExternalSymbolSymbol(MI->getOperand(2)); + + const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); + const MCExpr *PICBase = + MCSymbolRefExpr::Create(MCInstLowering.GetPICBaseSymbol(), OutContext); + DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext); + + DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), + DotExpr, OutContext); + + MCInst TmpInst; + TmpInst.setOpcode(X86::ADD32ri); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); + TmpInst.addOperand(MCOperand::CreateExpr(DotExpr)); + printMCInst(&TmpInst); + return; + } + } + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + + + printMCInst(&TmpInst); +} + diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h new file mode 100644 index 0000000..fa25b90 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h @@ -0,0 +1,54 @@ +//===-- X86MCInstLower.h - Lower MachineInstr to MCInst -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_MCINSTLOWER_H +#define X86_MCINSTLOWER_H + +#include "llvm/Support/Compiler.h" + +namespace llvm { + class MCContext; + class MCInst; + class MCOperand; + class MCSymbol; + class MachineInstr; + class MachineModuleInfoMachO; + class MachineOperand; + class Mangler; + class X86AsmPrinter; + class X86Subtarget; + +/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. +class VISIBILITY_HIDDEN X86MCInstLower { + MCContext &Ctx; + Mangler *Mang; + X86AsmPrinter &AsmPrinter; + + const X86Subtarget &getSubtarget() const; +public: + X86MCInstLower(MCContext &ctx, Mangler *mang, X86AsmPrinter &asmprinter) + : Ctx(ctx), Mang(mang), AsmPrinter(asmprinter) {} + + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + + MCSymbol *GetPICBaseSymbol() const; + + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; + MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; + MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + +private: + MachineModuleInfoMachO &getMachOMMI() const; +}; + +} + +#endif diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 7ea0e51..3ad65fb 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -7,13 +7,15 @@ tablegen(X86GenInstrNames.inc -gen-instr-enums) tablegen(X86GenInstrInfo.inc -gen-instr-desc) tablegen(X86GenAsmWriter.inc -gen-asm-writer) tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) +tablegen(X86GenAsmMatcher.inc -gen-asm-matcher) tablegen(X86GenDAGISel.inc -gen-dag-isel) tablegen(X86GenFastISel.inc -gen-fast-isel) tablegen(X86GenCallingConv.inc -gen-callingconv) tablegen(X86GenSubtarget.inc -gen-subtarget) -add_llvm_target(X86CodeGen +set(sources X86CodeEmitter.cpp + X86COFFMachineModuleInfo.cpp X86ELFWriterInfo.cpp X86FloatingPoint.cpp X86FloatingPointRegKill.cpp @@ -21,11 +23,19 @@ add_llvm_target(X86CodeGen X86ISelLowering.cpp X86InstrInfo.cpp X86JITInfo.cpp + X86MCAsmInfo.cpp X86RegisterInfo.cpp X86Subtarget.cpp - X86TargetAsmInfo.cpp X86TargetMachine.cpp + X86TargetObjectFile.cpp X86FastISel.cpp ) +if( CMAKE_CL_64 ) + enable_language(ASM_MASM) + set(sources ${sources} X86CompilationCallback_Win64.asm) +endif() + +add_llvm_target(X86CodeGen ${sources}) + target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG) diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index 44f1c5d..220831d 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -13,11 +13,11 @@ TARGET = X86 # Make sure that tblgen is run, first thing. BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ X86GenRegisterInfo.inc X86GenInstrNames.inc \ - X86GenInstrInfo.inc X86GenAsmWriter.inc \ + X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \ X86GenAsmWriter1.inc X86GenDAGISel.inc \ X86GenFastISel.inc \ X86GenCallingConv.inc X86GenSubtarget.inc -DIRS = AsmPrinter +DIRS = AsmPrinter AsmParser TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index ad12137..e8f7c5d 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -249,3 +249,52 @@ lowered return value, and it would free non-C frontends from a complication only required by a C-based ABI. //===---------------------------------------------------------------------===// + +We get a redundant zero extension for code like this: + +int mask[1000]; +int foo(unsigned x) { + if (x < 10) + x = x * 45; + else + x = x * 78; + return mask[x]; +} + +_foo: +LBB1_0: ## entry + cmpl $9, %edi + jbe LBB1_3 ## bb +LBB1_1: ## bb1 + imull $78, %edi, %eax +LBB1_2: ## bb2 + movl %eax, %eax <---- + movq _mask@GOTPCREL(%rip), %rcx + movl (%rcx,%rax,4), %eax + ret +LBB1_3: ## bb + imull $45, %edi, %eax + jmp LBB1_2 ## bb2 + +Before regalloc, we have: + + %reg1025<def> = IMUL32rri8 %reg1024, 45, %EFLAGS<imp-def> + JMP mbb<bb2,0x203afb0> + Successors according to CFG: 0x203afb0 (#3) + +bb1: 0x203af60, LLVM BB @0x1e02310, ID#2: + Predecessors according to CFG: 0x203aec0 (#0) + %reg1026<def> = IMUL32rri8 %reg1024, 78, %EFLAGS<imp-def> + Successors according to CFG: 0x203afb0 (#3) + +bb2: 0x203afb0, LLVM BB @0x1e02340, ID#3: + Predecessors according to CFG: 0x203af10 (#1) 0x203af60 (#2) + %reg1027<def> = PHI %reg1025, mbb<bb,0x203af10>, + %reg1026, mbb<bb1,0x203af60> + %reg1029<def> = MOVZX64rr32 %reg1027 + +so we'd have to know that IMUL32rri8 leaves the high word zero extended and to +be able to recognize the zero extend. This could also presumably be implemented +if we have whole-function selectiondags. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 4464878..046d35c 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1932,3 +1932,23 @@ Replacing an icmp+select with a shift should always be considered profitable in instcombine. //===---------------------------------------------------------------------===// + +Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch +properly. + +When the return value is not used (i.e. only care about the value in the +memory), x86 does not have to use add to implement these. Instead, it can use +add, sub, inc, dec instructions with the "lock" prefix. + +This is currently implemented using a bit of instruction selection trick. The +issue is the target independent pattern produces one output and a chain and we +want to map it into one that just output a chain. The current trick is to select +it into a MERGE_VALUES with the first definition being an implicit_def. The +proper solution is to add new ISD opcodes for the no-output variant. DAG +combiner can then transform the node before it gets to target node selection. + +Problem #2 is we are adding a whole bunch of x86 atomic instructions when in +fact these instructions are identical to the non-lock versions. We need a way to +add target specific information to target nodes and have this information +carried over to machine instructions. Asm printer (or JIT) can use this +information to add the "lock" prefix. diff --git a/lib/Target/X86/TargetInfo/CMakeLists.txt b/lib/Target/X86/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000..90be9f5 --- /dev/null +++ b/lib/Target/X86/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMX86Info + X86TargetInfo.cpp + ) + +add_dependencies(LLVMX86Info X86CodeGenTable_gen) diff --git a/lib/Target/X86/TargetInfo/Makefile b/lib/Target/X86/TargetInfo/Makefile new file mode 100644 index 0000000..6677d4b --- /dev/null +++ b/lib/Target/X86/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/X86/TargetInfo/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86Info + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp new file mode 100644 index 0000000..08d4d84 --- /dev/null +++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp @@ -0,0 +1,23 @@ +//===-- X86TargetInfo.cpp - X86 Target Implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheX86_32Target, llvm::TheX86_64Target; + +extern "C" void LLVMInitializeX86TargetInfo() { + RegisterTarget<Triple::x86, /*HasJIT=*/true> + X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above"); + + RegisterTarget<Triple::x86_64, /*HasJIT=*/true> + Y(TheX86_64Target, "x86-64", "64-bit X86: EM64T and AMD64"); +} diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 22de3f6..a167118 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -22,8 +22,10 @@ namespace llvm { class X86TargetMachine; class FunctionPass; class MachineCodeEmitter; +class MCCodeEmitter; class JITCodeEmitter; -class raw_ostream; +class Target; +class formatted_raw_ostream; /// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. @@ -42,13 +44,6 @@ FunctionPass *createX86FloatingPointStackifierPass(); /// FunctionPass *createX87FPRegKillInserterPass(); -/// createX86CodePrinterPass - Returns a pass that prints the X86 -/// assembly code for a MachineFunction to the given output stream, -/// using the given target machine description. -/// -FunctionPass *createX86CodePrinterPass(raw_ostream &o, X86TargetMachine &tm, - bool Verbose); - /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code /// to the specified MCE object. @@ -56,6 +51,10 @@ FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM, MachineCodeEmitter &MCE); FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE); +FunctionPass *createX86ObjectCodeEmitterPass(X86TargetMachine &TM, + ObjectCodeEmitter &OCE); + +MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM); /// createX86EmitCodeToMemory - Returns a pass that converts a register /// allocated function into raw machine code in a dynamically @@ -68,6 +67,8 @@ FunctionPass *createEmitX86CodeToMemory(); /// FunctionPass *createX86MaxStackAlignmentCalculatorPass(); +extern Target TheX86_32Target, TheX86_64Target; + } // End llvm namespace // Defines symbolic names for X86 registers. This defines a mapping from diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 47861d5..da467fe 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -19,12 +19,17 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // X86 Subtarget features. //===----------------------------------------------------------------------===// - + +def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", + "Enable conditional move instructions">; + def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", "Enable MMX instructions">; def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", "Enable SSE instructions", - [FeatureMMX]>; + // SSE codegen depends on cmovs, and all + // SSE1+ processors support them. + [FeatureMMX, FeatureCMOV]>; def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", "Enable SSE2 instructions", [FeatureSSE1]>; @@ -76,8 +81,8 @@ def : Proc<"i586", []>; def : Proc<"pentium", []>; def : Proc<"pentium-mmx", [FeatureMMX]>; def : Proc<"i686", []>; -def : Proc<"pentiumpro", []>; -def : Proc<"pentium2", [FeatureMMX]>; +def : Proc<"pentiumpro", [FeatureCMOV]>; +def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>; def : Proc<"pentium3", [FeatureSSE1]>; def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"pentium4", [FeatureSSE2]>; @@ -178,21 +183,34 @@ include "X86CallingConv.td" // Assembly Printers //===----------------------------------------------------------------------===// +// Currently the X86 assembly parser only supports ATT syntax. +def ATTAsmParser : AsmParser { + string AsmParserClassName = "ATTAsmParser"; + int Variant = 0; + + // Discard comments in assembly strings. + string CommentDelimiter = "#"; + + // Recognize hard coded registers. + string RegisterPrefix = "%"; +} + // The X86 target supports two different syntaxes for emitting machine code. // This is controlled by the -x86-asm-syntax={att|intel} def ATTAsmWriter : AsmWriter { - string AsmWriterClassName = "ATTAsmPrinter"; + string AsmWriterClassName = "ATTInstPrinter"; int Variant = 0; } def IntelAsmWriter : AsmWriter { - string AsmWriterClassName = "IntelAsmPrinter"; + string AsmWriterClassName = "IntelInstPrinter"; int Variant = 1; } - def X86 : Target { // Information about the instructions... let InstructionSet = X86InstrInfo; + let AssemblyParsers = [ATTAsmParser]; + let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; } diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp new file mode 100644 index 0000000..01c4fcf --- /dev/null +++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp @@ -0,0 +1,123 @@ +//===-- llvm/CodeGen/X86COFFMachineModuleInfo.cpp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an MMI implementation for X86 COFF (windows) targets. +// +//===----------------------------------------------------------------------===// + +#include "X86COFFMachineModuleInfo.h" +#include "X86MachineFunctionInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) { +} +X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() { + +} + +void X86COFFMachineModuleInfo::AddFunctionInfo(const Function *F, + const X86MachineFunctionInfo &Val) { + FunctionInfoMap[F] = Val; +} + + + +static X86MachineFunctionInfo calculateFunctionInfo(const Function *F, + const TargetData &TD) { + X86MachineFunctionInfo Info; + uint64_t Size = 0; + + switch (F->getCallingConv()) { + case CallingConv::X86_StdCall: + Info.setDecorationStyle(StdCall); + break; + case CallingConv::X86_FastCall: + Info.setDecorationStyle(FastCall); + break; + default: + return Info; + } + + unsigned argNum = 1; + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI, ++argNum) { + const Type* Ty = AI->getType(); + + // 'Dereference' type in case of byval parameter attribute + if (F->paramHasAttr(argNum, Attribute::ByVal)) + Ty = cast<PointerType>(Ty)->getElementType(); + + // Size should be aligned to DWORD boundary + Size += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; + } + + // We're not supporting tooooo huge arguments :) + Info.setBytesToPopOnReturn((unsigned int)Size); + return Info; +} + + +/// DecorateCygMingName - Query FunctionInfoMap and use this information for +/// various name decorations for Cygwin and MingW. +void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl<char> &Name, + const GlobalValue *GV, + const TargetData &TD) { + const Function *F = dyn_cast<Function>(GV); + if (!F) return; + + // Save function name for later type emission. + if (F->isDeclaration()) + CygMingStubs.insert(StringRef(Name.data(), Name.size())); + + // We don't want to decorate non-stdcall or non-fastcall functions right now + CallingConv::ID CC = F->getCallingConv(); + if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall) + return; + + const X86MachineFunctionInfo *Info; + + FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F); + if (info_item == FunctionInfoMap.end()) { + // Calculate apropriate function info and populate map + FunctionInfoMap[F] = calculateFunctionInfo(F, TD); + Info = &FunctionInfoMap[F]; + } else { + Info = &info_item->second; + } + + if (Info->getDecorationStyle() == None) return; + const FunctionType *FT = F->getFunctionType(); + + // "Pure" variadic functions do not receive @0 suffix. + if (!FT->isVarArg() || FT->getNumParams() == 0 || + (FT->getNumParams() == 1 && F->hasStructRetAttr())) + raw_svector_ostream(Name) << '@' << Info->getBytesToPopOnReturn(); + + if (Info->getDecorationStyle() == FastCall) { + if (Name[0] == '_') + Name[0] = '@'; + else + Name.insert(Name.begin(), '@'); + } +} + +/// DecorateCygMingName - Query FunctionInfoMap and use this information for +/// various name decorations for Cygwin and MingW. +void X86COFFMachineModuleInfo::DecorateCygMingName(std::string &Name, + const GlobalValue *GV, + const TargetData &TD) { + SmallString<128> NameStr(Name.begin(), Name.end()); + DecorateCygMingName(NameStr, GV, TD); + Name.assign(NameStr.begin(), NameStr.end()); +} diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h new file mode 100644 index 0000000..afd5525 --- /dev/null +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -0,0 +1,67 @@ +//===-- llvm/CodeGen/X86COFFMachineModuleInfo.h -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an MMI implementation for X86 COFF (windows) targets. +// +//===----------------------------------------------------------------------===// + +#ifndef X86COFF_MACHINEMODULEINFO_H +#define X86COFF_MACHINEMODULEINFO_H + +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ADT/StringSet.h" + +namespace llvm { + class X86MachineFunctionInfo; + class TargetData; + +/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation +/// for X86 COFF targets. +class X86COFFMachineModuleInfo : public MachineModuleInfoImpl { + StringSet<> CygMingStubs; + + // We have to propagate some information about MachineFunction to + // AsmPrinter. It's ok, when we're printing the function, since we have + // access to MachineFunction and can get the appropriate MachineFunctionInfo. + // Unfortunately, this is not possible when we're printing reference to + // Function (e.g. calling it and so on). Even more, there is no way to get the + // corresponding MachineFunctions: it can even be not created at all. That's + // why we should use additional structure, when we're collecting all necessary + // information. + // + // This structure is using e.g. for name decoration for stdcall & fastcall'ed + // function, since we have to use arguments' size for decoration. + typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap; + FMFInfoMap FunctionInfoMap; + +public: + X86COFFMachineModuleInfo(const MachineModuleInfo &); + ~X86COFFMachineModuleInfo(); + + + void DecorateCygMingName(std::string &Name, const GlobalValue *GV, + const TargetData &TD); + void DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV, + const TargetData &TD); + + void AddFunctionInfo(const Function *F, const X86MachineFunctionInfo &Val); + + + typedef StringSet<>::const_iterator stub_iterator; + stub_iterator stub_begin() const { return CygMingStubs.begin(); } + stub_iterator stub_end() const { return CygMingStubs.end(); } + + +}; + + + +} // end namespace llvm + +#endif diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index e9fcbd5..d77f039 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -89,7 +89,7 @@ def RetCC_X86_64_C : CallingConv<[ // X86-Win64 C return-value convention. def RetCC_X86_Win64_C : CallingConv<[ // The X86-Win64 calling convention always returns __m64 values in RAX. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[RAX]>>, + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>, // And FP in XMM0 only. CCIfType<[f32], CCAssignToReg<[XMM0]>>, @@ -137,26 +137,26 @@ def CC_X86_64_C : CallingConv<[ // The 'nest' parameter, if any, is passed in R10. CCIfNest<CCAssignToReg<[R10]>>, + // The first 6 v1i64 vector arguments are passed in GPRs on Darwin. + CCIfType<[v1i64], + CCIfSubtarget<"isTargetDarwin()", + CCBitConvertToType<i64>>>, + // The first 6 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>, CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>, - - // The first 8 FP/Vector arguments are passed in XMM registers. - CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfSubtarget<"hasSSE1()", - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, // The first 8 MMX (except for v1i64) vector arguments are passed in XMM // registers on Darwin. CCIfType<[v8i8, v4i16, v2i32, v2f32], CCIfSubtarget<"isTargetDarwin()", CCIfSubtarget<"hasSSE2()", - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>>, + CCPromoteToType<v2i64>>>>, - // The first 8 v1i64 vector arguments are passed in GPRs on Darwin. - CCIfType<[v1i64], - CCIfSubtarget<"isTargetDarwin()", - CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>, + // The first 8 FP/Vector arguments are passed in XMM registers. + CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfSubtarget<"hasSSE1()", + CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. @@ -184,6 +184,13 @@ def CC_X86_Win64_C : CallingConv<[ // The 'nest' parameter, if any, is passed in R10. CCIfNest<CCAssignToReg<[R10]>>, + // 128 bit vectors are passed by pointer + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>, + + // The first 4 MMX vector arguments are passed in GPRs. + CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], + CCBitConvertToType<i64>>, + // The first 4 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ], [XMM0, XMM1, XMM2, XMM3]>>, @@ -195,24 +202,16 @@ def CC_X86_Win64_C : CallingConv<[ CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3], [RCX , RDX , R8 , R9 ]>>, - // The first 4 MMX vector arguments are passed in GPRs. - CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], - CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ], - [XMM0, XMM1, XMM2, XMM3]>>, - // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 16-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 16>>, + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, // Long doubles get stack slots whose size and alignment depends on the // subtarget. CCIfType<[f80], CCAssignToStack<0, 0>>, - // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, - - // __m64 vectors get 8-byte stack slots that are 16-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>> + // __m64 vectors get 8-byte stack slots that are 8-byte aligned. + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> ]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index d5846a0..f942f3f 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -22,21 +22,27 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" #include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; STATISTIC(NumEmitted, "Number of machine instructions emitted"); namespace { -template<class CodeEmitter> + template<class CodeEmitter> class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass { const X86InstrInfo *II; const TargetData *TD; @@ -67,6 +73,7 @@ template<class CodeEmitter> const TargetInstrDesc *Desc); void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); AU.addRequired<MachineModuleInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -83,7 +90,7 @@ template<class CodeEmitter> intptr_t PCAdj = 0); void emitDisplacementField(const MachineOperand *RelocOp, int DispVal, - intptr_t PCAdj = 0); + intptr_t Adj = 0, bool IsPCRel = true); void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField); void emitRegModRMByte(unsigned RegOpcodeField); @@ -95,29 +102,27 @@ template<class CodeEmitter> intptr_t PCAdj = 0); unsigned getX86RegNum(unsigned RegNo) const; - - bool gvNeedsNonLazyPtr(const GlobalValue *GV); }; template<class CodeEmitter> char Emitter<CodeEmitter>::ID = 0; -} +} // end anonymous namespace. /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code /// to the specified templated MachineCodeEmitter object. -namespace llvm { - -FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM, - MachineCodeEmitter &MCE) { +FunctionPass *llvm::createX86CodeEmitterPass(X86TargetMachine &TM, + MachineCodeEmitter &MCE) { return new Emitter<MachineCodeEmitter>(TM, MCE); } -FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, - JITCodeEmitter &JCE) { +FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM, + JITCodeEmitter &JCE) { return new Emitter<JITCodeEmitter>(TM, JCE); } - -} // end namespace llvm +FunctionPass *llvm::createX86ObjectCodeEmitterPass(X86TargetMachine &TM, + ObjectCodeEmitter &OCE) { + return new Emitter<ObjectCodeEmitter>(TM, OCE); +} template<class CodeEmitter> bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { @@ -130,7 +135,8 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { IsPIC = TM.getRelocationModel() == Reloc::PIC_; do { - DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n"; + DEBUG(errs() << "JITTing function '" + << MF.getFunction()->getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { @@ -172,7 +178,7 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, intptr_t PCAdj /* = 0 */, bool NeedStub /* = false */, bool Indirect /* = false */) { - intptr_t RelocCST = 0; + intptr_t RelocCST = Disp; if (Reloc == X86::reloc_picrel_word) RelocCST = PICBaseOffset; else if (Reloc == X86::reloc_pcrel_word) @@ -291,53 +297,61 @@ static bool isDisp8(int Value) { return Value == (signed char)Value; } -template<class CodeEmitter> -bool Emitter<CodeEmitter>::gvNeedsNonLazyPtr(const GlobalValue *GV) { - // For Darwin, simulate the linktime GOT by using the same non-lazy-pointer +static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp, + const TargetMachine &TM) { + // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer // mechanism as 32-bit mode. - return (!Is64BitMode || TM.getSubtarget<X86Subtarget>().isTargetDarwin()) && - TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false); + if (TM.getSubtarget<X86Subtarget>().is64Bit() && + !TM.getSubtarget<X86Subtarget>().isTargetDarwin()) + return false; + + // Return true if this is a reference to a stub containing the address of the + // global, not the global itself. + return isGlobalStubReference(GVOp.getTargetFlags()); } template<class CodeEmitter> void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp, - int DispVal, intptr_t PCAdj) { + int DispVal, + intptr_t Adj /* = 0 */, + bool IsPCRel /* = true */) { // If this is a simple integer displacement that doesn't require a relocation, // emit it now. if (!RelocOp) { emitConstant(DispVal, 4); return; } - + // Otherwise, this is something that requires a relocation. Emit it as such // now. + unsigned RelocType = Is64BitMode ? + (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext) + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); if (RelocOp->isGlobal()) { // In 64-bit static small code model, we could potentially emit absolute. - // But it's probably not beneficial. + // But it's probably not beneficial. If the MCE supports using RIP directly + // do it, otherwise fallback to absolute (this is determined by IsPCRel). // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); bool NeedStub = isa<Function>(RelocOp->getGlobal()); - bool Indirect = gvNeedsNonLazyPtr(RelocOp->getGlobal()); - emitGlobalAddress(RelocOp->getGlobal(), rt, RelocOp->getOffset(), - PCAdj, NeedStub, Indirect); + bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); + emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(), + Adj, NeedStub, Indirect); + } else if (RelocOp->isSymbol()) { + emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType); } else if (RelocOp->isCPI()) { - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word; - emitConstPoolAddress(RelocOp->getIndex(), rt, - RelocOp->getOffset(), PCAdj); - } else if (RelocOp->isJTI()) { - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word; - emitJumpTableAddress(RelocOp->getIndex(), rt, PCAdj); + emitConstPoolAddress(RelocOp->getIndex(), RelocType, + RelocOp->getOffset(), Adj); } else { - assert(0 && "Unknown value to relocate!"); + assert(RelocOp->isJTI() && "Unexpected machine operand!"); + emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj); } } template<class CodeEmitter> void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, - unsigned Op, unsigned RegOpcodeField, - intptr_t PCAdj) { + unsigned Op,unsigned RegOpcodeField, + intptr_t PCAdj) { const MachineOperand &Op3 = MI.getOperand(Op+3); int DispVal = 0; const MachineOperand *DispForReloc = 0; @@ -345,15 +359,17 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // Figure out what sort of displacement we have to handle here. if (Op3.isGlobal()) { DispForReloc = &Op3; + } else if (Op3.isSymbol()) { + DispForReloc = &Op3; } else if (Op3.isCPI()) { - if (Is64BitMode || IsPIC) { + if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) { DispForReloc = &Op3; } else { DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex()); DispVal += Op3.getOffset(); } } else if (Op3.isJTI()) { - if (Is64BitMode || IsPIC) { + if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) { DispForReloc = &Op3; } else { DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex()); @@ -368,17 +384,23 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, unsigned BaseReg = Base.getReg(); + // Indicate that the displacement will use an pcrel or absolute reference + // by default. MCEs able to resolve addresses on-the-fly use pcrel by default + // while others, unless explicit asked to use RIP, use absolute references. + bool IsPCRel = MCE.earlyResolveAddresses() ? true : false; + // Is a SIB byte needed? + // If no BaseReg, issue a RIP relative instruction only if the MCE can + // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table + // 2-7) and absolute references. if ((!Is64BitMode || DispForReloc || BaseReg != 0) && - IndexReg.getReg() == 0 && - (BaseReg == 0 || BaseReg == X86::RIP || - getX86RegNum(BaseReg) != N86::ESP)) { - if (BaseReg == 0 || - BaseReg == X86::RIP) { // Just a displacement? + IndexReg.getReg() == 0 && + ((BaseReg == 0 && MCE.earlyResolveAddresses()) || BaseReg == X86::RIP || + (BaseReg != 0 && getX86RegNum(BaseReg) != N86::ESP))) { + if (BaseReg == 0 || BaseReg == X86::RIP) { // Just a displacement? // Emit special case [disp32] encoding MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); - - emitDisplacementField(DispForReloc, DispVal, PCAdj); + emitDisplacementField(DispForReloc, DispVal, PCAdj, true); } else { unsigned BaseRegNo = getX86RegNum(BaseReg); if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { @@ -391,7 +413,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, } else { // Emit the most general non-SIB encoding: [REG+disp32] MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); - emitDisplacementField(DispForReloc, DispVal, PCAdj); + emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); } } @@ -427,13 +449,13 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, unsigned SS = SSTable[Scale.getImm()]; if (BaseReg == 0) { - // Handle the SIB byte for the case where there is no base. The - // displacement has already been output. + // Handle the SIB byte for the case where there is no base, see Intel + // Manual 2A, table 2-7. The displacement has already been output. unsigned IndexRegNo; if (IndexReg.getReg()) IndexRegNo = getX86RegNum(IndexReg.getReg()); - else - IndexRegNo = 4; // For example [ESP+1*<noreg>+4] + else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) + IndexRegNo = 4; emitSIBByte(SS, IndexRegNo, 5); } else { unsigned BaseRegNo = getX86RegNum(BaseReg); @@ -449,21 +471,23 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, if (ForceDisp8) { emitConstant(DispVal, 1); } else if (DispVal != 0 || ForceDisp32) { - emitDisplacementField(DispForReloc, DispVal, PCAdj); + emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); } } } template<class CodeEmitter> -void Emitter<CodeEmitter>::emitInstruction( - const MachineInstr &MI, - const TargetInstrDesc *Desc) { - DOUT << MI; +void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, + const TargetInstrDesc *Desc) { + DEBUG(errs() << MI); + + MCE.processDebugLoc(MI.getDebugLoc(), true); unsigned Opcode = Desc->Opcode; // Emit the lock opcode prefix as needed. - if (Desc->TSFlags & X86II::LOCK) MCE.emitByte(0xF0); + if (Desc->TSFlags & X86II::LOCK) + MCE.emitByte(0xF0); // Emit segment override opcode prefix as needed. switch (Desc->TSFlags & X86II::SegOvrMask) { @@ -473,18 +497,21 @@ void Emitter<CodeEmitter>::emitInstruction( case X86II::GS: MCE.emitByte(0x65); break; - default: assert(0 && "Invalid segment!"); + default: llvm_unreachable("Invalid segment!"); case 0: break; // No segment override! } // Emit the repeat opcode prefix as needed. - if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3); + if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) + MCE.emitByte(0xF3); // Emit the operand size opcode prefix as needed. - if (Desc->TSFlags & X86II::OpSize) MCE.emitByte(0x66); + if (Desc->TSFlags & X86II::OpSize) + MCE.emitByte(0x66); // Emit the address size opcode prefix as needed. - if (Desc->TSFlags & X86II::AdSize) MCE.emitByte(0x67); + if (Desc->TSFlags & X86II::AdSize) + MCE.emitByte(0x67); bool Need0FPrefix = false; switch (Desc->TSFlags & X86II::Op0Mask) { @@ -493,6 +520,10 @@ void Emitter<CodeEmitter>::emitInstruction( case X86II::TA: // 0F 3A Need0FPrefix = true; break; + case X86II::TF: // F2 0F 38 + MCE.emitByte(0xF2); + Need0FPrefix = true; + break; case X86II::REP: break; // already handled. case X86II::XS: // F3 0F MCE.emitByte(0xF3); @@ -508,14 +539,13 @@ void Emitter<CodeEmitter>::emitInstruction( (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8) >> X86II::Op0Shift)); break; // Two-byte opcode prefix - default: assert(0 && "Invalid prefix!"); + default: llvm_unreachable("Invalid prefix!"); case 0: break; // No prefix! } + // Handle REX prefix. if (Is64BitMode) { - // REX prefix - unsigned REX = X86InstrInfo::determineREX(MI); - if (REX) + if (unsigned REX = X86InstrInfo::determineREX(MI)) MCE.emitByte(0x40 | REX); } @@ -524,7 +554,8 @@ void Emitter<CodeEmitter>::emitInstruction( MCE.emitByte(0x0F); switch (Desc->TSFlags & X86II::Op0Mask) { - case X86II::T8: // 0F 38 + case X86II::TF: // F2 0F 38 + case X86II::T8: // 0F 38 MCE.emitByte(0x38); break; case X86II::TA: // 0F 3A @@ -543,29 +574,29 @@ void Emitter<CodeEmitter>::emitInstruction( unsigned char BaseOpcode = II->getBaseOpcodeFor(Desc); switch (Desc->TSFlags & X86II::FormMask) { - default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!"); + default: + llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); case X86II::Pseudo: // Remember the current PC offset, this is the PIC relocation // base address. switch (Opcode) { default: - assert(0 && "psuedo instructions should be removed before code emission"); + llvm_unreachable("psuedo instructions should be removed before code" + " emission"); break; - case TargetInstrInfo::INLINEASM: { + case TargetInstrInfo::INLINEASM: // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. - if (MI.getOperand(0).getSymbolName()[0]) { - assert(0 && "JIT does not support inline asm!\n"); - abort(); - } + if (MI.getOperand(0).getSymbolName()[0]) + llvm_report_error("JIT does not support inline asm!"); break; - } case TargetInstrInfo::DBG_LABEL: case TargetInstrInfo::EH_LABEL: + case TargetInstrInfo::GC_LABEL: MCE.emitLabel(MI.getOperand(0).getImm()); break; case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::DECLARE: + case TargetInstrInfo::KILL: case X86::DWARF_LOC: case X86::FP_REG_KILL: break; @@ -582,73 +613,86 @@ void Emitter<CodeEmitter>::emitInstruction( } CurOp = NumOps; break; - case X86II::RawFrm: + case X86II::RawFrm: { MCE.emitByte(BaseOpcode); - if (CurOp != NumOps) { - const MachineOperand &MO = MI.getOperand(CurOp++); - - DOUT << "RawFrm CurOp " << CurOp << "\n"; - DOUT << "isMBB " << MO.isMBB() << "\n"; - DOUT << "isGlobal " << MO.isGlobal() << "\n"; - DOUT << "isSymbol " << MO.isSymbol() << "\n"; - DOUT << "isImm " << MO.isImm() << "\n"; - - if (MO.isMBB()) { - emitPCRelativeBlockAddress(MO.getMBB()); - } else if (MO.isGlobal()) { - // Assume undefined functions may be outside the Small codespace. - bool NeedStub = - (Is64BitMode && - (TM.getCodeModel() == CodeModel::Large || - TM.getSubtarget<X86Subtarget>().isTargetDarwin())) || - Opcode == X86::TAILJMPd; - emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, - MO.getOffset(), 0, NeedStub); - } else if (MO.isSymbol()) { - emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); - } else if (MO.isImm()) { - if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { - // Fix up immediate operand for pc relative calls. - intptr_t Imm = (intptr_t)MO.getImm(); - Imm = Imm - MCE.getCurrentPCValue() - 4; - emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc)); - } else - emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc)); - } else { - assert(0 && "Unknown RawFrm operand!"); - } + if (CurOp == NumOps) + break; + + const MachineOperand &MO = MI.getOperand(CurOp++); + + DEBUG(errs() << "RawFrm CurOp " << CurOp << "\n"); + DEBUG(errs() << "isMBB " << MO.isMBB() << "\n"); + DEBUG(errs() << "isGlobal " << MO.isGlobal() << "\n"); + DEBUG(errs() << "isSymbol " << MO.isSymbol() << "\n"); + DEBUG(errs() << "isImm " << MO.isImm() << "\n"); + + if (MO.isMBB()) { + emitPCRelativeBlockAddress(MO.getMBB()); + break; } + + if (MO.isGlobal()) { + // Assume undefined functions may be outside the Small codespace. + bool NeedStub = + (Is64BitMode && + (TM.getCodeModel() == CodeModel::Large || + TM.getSubtarget<X86Subtarget>().isTargetDarwin())) || + Opcode == X86::TAILJMPd; + emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, + MO.getOffset(), 0, NeedStub); + break; + } + + if (MO.isSymbol()) { + emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); + break; + } + + assert(MO.isImm() && "Unknown RawFrm operand!"); + if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { + // Fix up immediate operand for pc relative calls. + intptr_t Imm = (intptr_t)MO.getImm(); + Imm = Imm - MCE.getCurrentPCValue() - 4; + emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc)); + } else + emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc)); break; - - case X86II::AddRegFrm: + } + + case X86II::AddRegFrm: { MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg())); - if (CurOp != NumOps) { - const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); - if (MO1.isImm()) - emitConstant(MO1.getImm(), Size); - else { - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - // This should not occur on Darwin for relocatable objects. - if (Opcode == X86::MOV64ri) - rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? - if (MO1.isGlobal()) { - bool NeedStub = isa<Function>(MO1.getGlobal()); - bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal()); - emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - NeedStub, Indirect); - } else if (MO1.isSymbol()) - emitExternalSymbolAddress(MO1.getSymbolName(), rt); - else if (MO1.isCPI()) - emitConstPoolAddress(MO1.getIndex(), rt); - else if (MO1.isJTI()) - emitJumpTableAddress(MO1.getIndex(), rt); - } + if (CurOp == NumOps) + break; + + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO1.isImm()) { + emitConstant(MO1.getImm(), Size); + break; } + + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64ri64i32) + rt = X86::reloc_absolute_word; // FIXME: add X86II flag? + // This should not occur on Darwin for relocatable objects. + if (Opcode == X86::MOV64ri) + rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? + if (MO1.isGlobal()) { + bool NeedStub = isa<Function>(MO1.getGlobal()); + bool Indirect = gvNeedsNonLazyPtr(MO1, TM); + emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, + NeedStub, Indirect); + } else if (MO1.isSymbol()) + emitExternalSymbolAddress(MO1.getSymbolName(), rt); + else if (MO1.isCPI()) + emitConstPoolAddress(MO1.getIndex(), rt); + else if (MO1.isJTI()) + emitJumpTableAddress(MO1.getIndex(), rt); break; + } case X86II::MRMDestReg: { MCE.emitByte(BaseOpcode); @@ -656,7 +700,8 @@ void Emitter<CodeEmitter>::emitInstruction( getX86RegNum(MI.getOperand(CurOp+1).getReg())); CurOp += 2; if (CurOp != NumOps) - emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc)); + emitConstant(MI.getOperand(CurOp++).getImm(), + X86InstrInfo::sizeOfImm(Desc)); break; } case X86II::MRMDestMem: { @@ -666,7 +711,8 @@ void Emitter<CodeEmitter>::emitInstruction( .getReg())); CurOp += X86AddrNumOperands + 1; if (CurOp != NumOps) - emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc)); + emitConstant(MI.getOperand(CurOp++).getImm(), + X86InstrInfo::sizeOfImm(Desc)); break; } @@ -729,29 +775,31 @@ void Emitter<CodeEmitter>::emitInstruction( (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); } - if (CurOp != NumOps) { - const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); - if (MO1.isImm()) - emitConstant(MO1.getImm(), Size); - else { - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - if (Opcode == X86::MOV64ri32) - rt = X86::reloc_absolute_word; // FIXME: add X86II flag? - if (MO1.isGlobal()) { - bool NeedStub = isa<Function>(MO1.getGlobal()); - bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal()); - emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - NeedStub, Indirect); - } else if (MO1.isSymbol()) - emitExternalSymbolAddress(MO1.getSymbolName(), rt); - else if (MO1.isCPI()) - emitConstPoolAddress(MO1.getIndex(), rt); - else if (MO1.isJTI()) - emitJumpTableAddress(MO1.getIndex(), rt); - } + if (CurOp == NumOps) + break; + + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO1.isImm()) { + emitConstant(MO1.getImm(), Size); + break; } + + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64ri32) + rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? + if (MO1.isGlobal()) { + bool NeedStub = isa<Function>(MO1.getGlobal()); + bool Indirect = gvNeedsNonLazyPtr(MO1, TM); + emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, + NeedStub, Indirect); + } else if (MO1.isSymbol()) + emitExternalSymbolAddress(MO1.getSymbolName(), rt); + else if (MO1.isCPI()) + emitConstPoolAddress(MO1.getIndex(), rt); + else if (MO1.isJTI()) + emitJumpTableAddress(MO1.getIndex(), rt); break; } @@ -768,29 +816,31 @@ void Emitter<CodeEmitter>::emitInstruction( PCAdj); CurOp += X86AddrNumOperands; - if (CurOp != NumOps) { - const MachineOperand &MO = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); - if (MO.isImm()) - emitConstant(MO.getImm(), Size); - else { - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - if (Opcode == X86::MOV64mi32) - rt = X86::reloc_absolute_word; // FIXME: add X86II flag? - if (MO.isGlobal()) { - bool NeedStub = isa<Function>(MO.getGlobal()); - bool Indirect = gvNeedsNonLazyPtr(MO.getGlobal()); - emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, - NeedStub, Indirect); - } else if (MO.isSymbol()) - emitExternalSymbolAddress(MO.getSymbolName(), rt); - else if (MO.isCPI()) - emitConstPoolAddress(MO.getIndex(), rt); - else if (MO.isJTI()) - emitJumpTableAddress(MO.getIndex(), rt); - } + if (CurOp == NumOps) + break; + + const MachineOperand &MO = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO.isImm()) { + emitConstant(MO.getImm(), Size); + break; } + + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64mi32) + rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? + if (MO.isGlobal()) { + bool NeedStub = isa<Function>(MO.getGlobal()); + bool Indirect = gvNeedsNonLazyPtr(MO, TM); + emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, + NeedStub, Indirect); + } else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), rt); + else if (MO.isCPI()) + emitConstPoolAddress(MO.getIndex(), rt); + else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), rt); break; } @@ -804,10 +854,264 @@ void Emitter<CodeEmitter>::emitInstruction( } if (!Desc->isVariadic() && CurOp != NumOps) { - cerr << "Cannot encode: "; - MI.dump(); - cerr << '\n'; - abort(); +#ifndef NDEBUG + errs() << "Cannot encode all operands of: " << MI << "\n"; +#endif + llvm_unreachable(0); + } + + MCE.processDebugLoc(MI.getDebugLoc(), false); +} + +// Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter. +// +// FIXME: This is a total hack designed to allow work on llvm-mc to proceed +// without being blocked on various cleanups needed to support a clean interface +// to instruction encoding. +// +// Look away! + +#include "llvm/DerivedTypes.h" + +namespace { +class MCSingleInstructionCodeEmitter : public MachineCodeEmitter { + uint8_t Data[256]; + +public: + MCSingleInstructionCodeEmitter() { reset(); } + + void reset() { + BufferBegin = Data; + BufferEnd = array_endof(Data); + CurBufferPtr = Data; + } + + StringRef str() { + return StringRef(reinterpret_cast<char*>(BufferBegin), + CurBufferPtr - BufferBegin); + } + + virtual void startFunction(MachineFunction &F) {} + virtual bool finishFunction(MachineFunction &F) { return false; } + virtual void emitLabel(uint64_t LabelID) {} + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {} + virtual bool earlyResolveAddresses() const { return false; } + virtual void addRelocation(const MachineRelocation &MR) { } + virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { + return 0; + } + virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { + return 0; + } + virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + return 0; + } + virtual uintptr_t getLabelAddress(uint64_t LabelID) const { + return 0; + } + virtual void setModuleInfo(MachineModuleInfo* Info) {} +}; + +class X86MCCodeEmitter : public MCCodeEmitter { + X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT + +private: + X86TargetMachine &TM; + llvm::Function *DummyF; + TargetData *DummyTD; + mutable llvm::MachineFunction *DummyMF; + llvm::MachineBasicBlock *DummyMBB; + + MCSingleInstructionCodeEmitter *InstrEmitter; + Emitter<MachineCodeEmitter> *Emit; + +public: + X86MCCodeEmitter(X86TargetMachine &_TM) : TM(_TM) { + // Verily, thou shouldst avert thine eyes. + const llvm::FunctionType *FTy = + FunctionType::get(llvm::Type::getVoidTy(getGlobalContext()), false); + DummyF = Function::Create(FTy, GlobalValue::InternalLinkage); + DummyTD = new TargetData(""); + DummyMF = new MachineFunction(DummyF, TM); + DummyMBB = DummyMF->CreateMachineBasicBlock(); + + InstrEmitter = new MCSingleInstructionCodeEmitter(); + Emit = new Emitter<MachineCodeEmitter>(TM, *InstrEmitter, + *TM.getInstrInfo(), + *DummyTD, false); + } + ~X86MCCodeEmitter() { + delete Emit; + delete InstrEmitter; + delete DummyMF; + delete DummyF; + } + + bool AddRegToInstr(const MCInst &MI, MachineInstr *Instr, + unsigned Start) const { + if (Start + 1 > MI.getNumOperands()) + return false; + + const MCOperand &Op = MI.getOperand(Start); + if (!Op.isReg()) return false; + + Instr->addOperand(MachineOperand::CreateReg(Op.getReg(), false)); + return true; + } + + bool AddImmToInstr(const MCInst &MI, MachineInstr *Instr, + unsigned Start) const { + if (Start + 1 > MI.getNumOperands()) + return false; + + const MCOperand &Op = MI.getOperand(Start); + if (Op.isImm()) { + Instr->addOperand(MachineOperand::CreateImm(Op.getImm())); + return true; + } + if (!Op.isExpr()) + return false; + + const MCExpr *Expr = Op.getExpr(); + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) { + Instr->addOperand(MachineOperand::CreateImm(CE->getValue())); + return true; + } + + // FIXME: Relocation / fixup. + Instr->addOperand(MachineOperand::CreateImm(0)); + return true; + } + + bool AddLMemToInstr(const MCInst &MI, MachineInstr *Instr, + unsigned Start) const { + return (AddRegToInstr(MI, Instr, Start + 0) && + AddImmToInstr(MI, Instr, Start + 1) && + AddRegToInstr(MI, Instr, Start + 2) && + AddImmToInstr(MI, Instr, Start + 3)); + } + + bool AddMemToInstr(const MCInst &MI, MachineInstr *Instr, + unsigned Start) const { + return (AddRegToInstr(MI, Instr, Start + 0) && + AddImmToInstr(MI, Instr, Start + 1) && + AddRegToInstr(MI, Instr, Start + 2) && + AddImmToInstr(MI, Instr, Start + 3) && + AddRegToInstr(MI, Instr, Start + 4)); + } + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS) const { + // Don't look yet! + + // Convert the MCInst to a MachineInstr so we can (ab)use the regular + // emitter. + const X86InstrInfo &II = *TM.getInstrInfo(); + const TargetInstrDesc &Desc = II.get(MI.getOpcode()); + MachineInstr *Instr = DummyMF->CreateMachineInstr(Desc, DebugLoc()); + DummyMBB->push_back(Instr); + + unsigned Opcode = MI.getOpcode(); + unsigned NumOps = MI.getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) { + Instr->addOperand(MachineOperand::CreateReg(0, false)); + ++CurOp; + } else if (NumOps > 2 && + Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 + --NumOps; + + bool OK = true; + switch (Desc.TSFlags & X86II::FormMask) { + case X86II::MRMDestReg: + case X86II::MRMSrcReg: + // Matching doesn't fill this in completely, we have to choose operand 0 + // for a tied register. + OK &= AddRegToInstr(MI, Instr, 0); CurOp++; + OK &= AddRegToInstr(MI, Instr, CurOp++); + if (CurOp < NumOps) + OK &= AddImmToInstr(MI, Instr, CurOp); + break; + + case X86II::RawFrm: + if (CurOp < NumOps) { + // Hack to make branches work. + if (!(Desc.TSFlags & X86II::ImmMask) && + MI.getOperand(0).isExpr() && + isa<MCSymbolRefExpr>(MI.getOperand(0).getExpr())) + Instr->addOperand(MachineOperand::CreateMBB(DummyMBB)); + else + OK &= AddImmToInstr(MI, Instr, CurOp); + } + break; + + case X86II::AddRegFrm: + OK &= AddRegToInstr(MI, Instr, CurOp++); + if (CurOp < NumOps) + OK &= AddImmToInstr(MI, Instr, CurOp); + break; + + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + // Matching doesn't fill this in completely, we have to choose operand 0 + // for a tied register. + OK &= AddRegToInstr(MI, Instr, 0); CurOp++; + if (CurOp < NumOps) + OK &= AddImmToInstr(MI, Instr, CurOp); + break; + + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; + if (CurOp < NumOps) + OK &= AddImmToInstr(MI, Instr, CurOp); + break; + + case X86II::MRMSrcMem: + OK &= AddRegToInstr(MI, Instr, CurOp++); + if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || + Opcode == X86::LEA16r || Opcode == X86::LEA32r) + OK &= AddLMemToInstr(MI, Instr, CurOp); + else + OK &= AddMemToInstr(MI, Instr, CurOp); + break; + + case X86II::MRMDestMem: + OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; + OK &= AddRegToInstr(MI, Instr, CurOp); + break; + + default: + case X86II::MRMInitReg: + case X86II::Pseudo: + OK = false; + break; + } + + if (!OK) { + errs() << "couldn't convert inst '"; + MI.dump(); + errs() << "' to machine instr:\n"; + Instr->dump(); + } + + InstrEmitter->reset(); + if (OK) + Emit->emitInstruction(*Instr, &Desc); + OS << InstrEmitter->str(); + + Instr->eraseFromParent(); } +}; } +// Ok, now you can look. +MCCodeEmitter *llvm::createX86MCCodeEmitter(const Target &, + TargetMachine &TM) { + return new X86MCCodeEmitter(static_cast<X86TargetMachine&>(TM)); +} diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm index 8002f98..f321778 100644 --- a/lib/Target/X86/X86CompilationCallback_Win64.asm +++ b/lib/Target/X86/X86CompilationCallback_Win64.asm @@ -17,10 +17,11 @@ extrn X86CompilationCallback2: PROC X86CompilationCallback proc push rbp - ; Save RSP + ; Save RSP. mov rbp, rsp ; Save all int arg registers + ; WARNING: We cannot use register spill area - we're generating stubs by hands! push rcx push rdx push r8 @@ -29,27 +30,27 @@ X86CompilationCallback proc ; Align stack on 16-byte boundary. and rsp, -16 - ; Save all XMM arg registers - sub rsp, 64 - movaps [rsp], xmm0 - movaps [rsp+16], xmm1 - movaps [rsp+32], xmm2 - movaps [rsp+48], xmm3 + ; Save all XMM arg registers. Also allocate reg spill area. + sub rsp, 96 + movaps [rsp +32], xmm0 + movaps [rsp+16+32], xmm1 + movaps [rsp+32+32], xmm2 + movaps [rsp+48+32], xmm3 ; JIT callee - ; Pass prev frame and return address + ; Pass prev frame and return address. mov rcx, rbp mov rdx, qword ptr [rbp+8] call X86CompilationCallback2 - ; Restore all XMM arg registers - movaps xmm3, [rsp+48] - movaps xmm2, [rsp+32] - movaps xmm1, [rsp+16] - movaps xmm0, [rsp] + ; Restore all XMM arg registers. + movaps xmm3, [rsp+48+32] + movaps xmm2, [rsp+32+32] + movaps xmm1, [rsp+16+32] + movaps xmm0, [rsp +32] - ; Restore RSP + ; Restore RSP. mov rsp, rbp ; Restore all int arg registers @@ -59,7 +60,7 @@ X86CompilationCallback proc pop rdx pop rcx - ; Restore RBP + ; Restore RBP. pop rbp ret X86CompilationCallback endp diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 912ab0e..1597d2b3 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -14,6 +14,7 @@ #include "X86ELFWriterInfo.h" #include "X86Relocations.h" #include "llvm/Function.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -38,11 +39,13 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { return R_X86_64_PC32; case X86::reloc_absolute_word: return R_X86_64_32; + case X86::reloc_absolute_word_sext: + return R_X86_64_32S; case X86::reloc_absolute_dword: return R_X86_64_64; case X86::reloc_picrel_word: default: - assert(0 && "unknown relocation type"); + llvm_unreachable("unknown x86_64 machine relocation type"); } } else { switch(MachineRelTy) { @@ -50,23 +53,101 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { return R_386_PC32; case X86::reloc_absolute_word: return R_386_32; + case X86::reloc_absolute_word_sext: case X86::reloc_absolute_dword: case X86::reloc_picrel_word: default: - assert(0 && "unknown relocation type"); + llvm_unreachable("unknown x86 machine relocation type"); } } return 0; } -long int X86ELFWriterInfo::getAddendForRelTy(unsigned RelTy) const { +long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier) const { if (is64Bit) { switch(RelTy) { - case R_X86_64_PC32: return -4; - break; + case R_X86_64_PC32: return Modifier - 4; + case R_X86_64_32: + case R_X86_64_32S: + case R_X86_64_64: + return Modifier; default: - assert(0 && "unknown x86 relocation type"); + llvm_unreachable("unknown x86_64 relocation type"); + } + } else { + switch(RelTy) { + case R_386_PC32: return Modifier - 4; + case R_386_32: return Modifier; + default: + llvm_unreachable("unknown x86 relocation type"); + } + } + return 0; +} + +unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const { + if (is64Bit) { + switch(RelTy) { + case R_X86_64_PC32: + case R_X86_64_32: + case R_X86_64_32S: + return 32; + case R_X86_64_64: + return 64; + default: + llvm_unreachable("unknown x86_64 relocation type"); + } + } else { + switch(RelTy) { + case R_386_PC32: + case R_386_32: + return 32; + default: + llvm_unreachable("unknown x86 relocation type"); } } return 0; } + +bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { + if (is64Bit) { + switch(RelTy) { + case R_X86_64_PC32: + return true; + case R_X86_64_32: + case R_X86_64_32S: + case R_X86_64_64: + return false; + default: + llvm_unreachable("unknown x86_64 relocation type"); + } + } else { + switch(RelTy) { + case R_386_PC32: + return true; + case R_386_32: + return false; + default: + llvm_unreachable("unknown x86 relocation type"); + } + } + return 0; +} + +unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const { + return is64Bit ? + X86::reloc_absolute_dword : X86::reloc_absolute_word; +} + +long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset, + unsigned RelOffset, + unsigned RelTy) const { + + if (RelTy == R_X86_64_PC32 || RelTy == R_386_PC32) + return SymOffset - (RelOffset + 4); + else + assert("computeRelocation unknown for this relocation type"); + + return 0; +} diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index 2ba1a0b..342e6e6 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -49,9 +49,26 @@ namespace llvm { /// ELF relocation entry. virtual bool hasRelocationAddend() const { return is64Bit ? true : false; } - /// getAddendForRelTy - Gets the addend value for an ELF relocation entry - /// based on the target relocation type - virtual long int getAddendForRelTy(unsigned RelTy) const; + /// getDefaultAddendForRelTy - Gets the default addend value for a + /// relocation entry based on the target ELF relocation type. + virtual long int getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier = 0) const; + + /// getRelTySize - Returns the size of relocatable field in bits + virtual unsigned getRelocationTySize(unsigned RelTy) const; + + /// isPCRelativeRel - True if the relocation type is pc relative + virtual bool isPCRelativeRel(unsigned RelTy) const; + + /// getJumpTableRelocationTy - Returns the machine relocation type used + /// to reference a jumptable. + virtual unsigned getAbsoluteLabelMachineRelTy() const; + + /// computeRelocation - Some relocatable fields could be relocated + /// directly, avoiding the relocation symbol emission, compute the + /// final relocation value for this symbol. + virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, + unsigned RelTy) const; }; } // end llvm namespace diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index b336d78..3401df0 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -78,19 +79,20 @@ public: #include "X86GenFastISel.inc" private: - bool X86FastEmitCompare(Value *LHS, Value *RHS, MVT VT); + bool X86FastEmitCompare(Value *LHS, Value *RHS, EVT VT); - bool X86FastEmitLoad(MVT VT, const X86AddressMode &AM, unsigned &RR); + bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); - bool X86FastEmitStore(MVT VT, Value *Val, + bool X86FastEmitStore(EVT VT, Value *Val, const X86AddressMode &AM); - bool X86FastEmitStore(MVT VT, unsigned Val, + bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM); - bool X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, unsigned Src, MVT SrcVT, + bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); - bool X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall); + bool X86SelectAddress(Value *V, X86AddressMode &AM); + bool X86SelectCallAddress(Value *V, X86AddressMode &AM); bool X86SelectLoad(Instruction *I); @@ -116,7 +118,7 @@ private: bool X86VisitIntrinsicCall(IntrinsicInst &I); bool X86SelectCall(Instruction *I); - CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false); + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false); const X86InstrInfo *getInstrInfo() const { return getTargetMachine()->getInstrInfo(); @@ -131,17 +133,17 @@ private: /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is /// computed in an SSE register, not on the X87 floating point stack. - bool isScalarFPTypeInSSEReg(MVT VT) const { + bool isScalarFPTypeInSSEReg(EVT VT) const { return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } - bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); + bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false); }; } // end anonymous namespace. -bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { +bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) { VT = TLI.getValueType(Ty, /*HandleUnknown=*/true); if (VT == MVT::Other || !VT.isSimple()) // Unhandled type. Halt "fast" selection and bail. @@ -167,7 +169,8 @@ bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { /// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling /// convention. -CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) { +CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, + bool isTaillCall) { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; @@ -186,13 +189,14 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) { /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. /// Return true and the result register by reference if it is possible. -bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM, +bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &ResultReg) { // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: return false; + case MVT::i1: case MVT::i8: Opc = X86::MOV8rm; RC = X86::GR8RegisterClass; @@ -243,13 +247,21 @@ bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM, /// and a displacement offset, or a GlobalAddress, /// i.e. V. Return true if it is possible. bool -X86FastISel::X86FastEmitStore(MVT VT, unsigned Val, +X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { case MVT::f80: // No f80 support yet. default: return false; + case MVT::i1: { + // Mask out all but lowest bit. + unsigned AndResult = createResultReg(X86::GR8RegisterClass); + BuildMI(MBB, DL, + TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); + Val = AndResult; + } + // FALLTHROUGH, handling i1 as i8. case MVT::i8: Opc = X86::MOV8mr; break; case MVT::i16: Opc = X86::MOV16mr; break; case MVT::i32: Opc = X86::MOV32mr; break; @@ -266,17 +278,19 @@ X86FastISel::X86FastEmitStore(MVT VT, unsigned Val, return true; } -bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val, +bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val, const X86AddressMode &AM) { // Handle 'null' like i32/i64 0. if (isa<ConstantPointerNull>(Val)) - Val = Constant::getNullValue(TD.getIntPtrType()); + Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); // If this is a store of a simple constant, fold the constant into the store. if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { unsigned Opc = 0; - switch (VT.getSimpleVT()) { + bool Signed = true; + switch (VT.getSimpleVT().SimpleTy) { default: break; + case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8. case MVT::i8: Opc = X86::MOV8mi; break; case MVT::i16: Opc = X86::MOV16mi; break; case MVT::i32: Opc = X86::MOV32mi; break; @@ -289,7 +303,8 @@ bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val, if (Opc) { addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) - .addImm(CI->getSExtValue()); + .addImm(Signed ? CI->getSExtValue() : + CI->getZExtValue()); return true; } } @@ -304,8 +319,8 @@ bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val, /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. /// ISD::SIGN_EXTEND). -bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, - unsigned Src, MVT SrcVT, +bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, + unsigned Src, EVT SrcVT, unsigned &ResultReg) { unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src); @@ -318,7 +333,7 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, /// X86SelectAddress - Attempt to fill in an address from the given value. /// -bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { +bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { User *U = NULL; unsigned Opcode = Instruction::UserOp1; if (Instruction *I = dyn_cast<Instruction>(V)) { @@ -333,22 +348,21 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { default: break; case Instruction::BitCast: // Look past bitcasts. - return X86SelectAddress(U->getOperand(0), AM, isCall); + return X86SelectAddress(U->getOperand(0), AM); case Instruction::IntToPtr: // Look past no-op inttoptrs. if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) - return X86SelectAddress(U->getOperand(0), AM, isCall); + return X86SelectAddress(U->getOperand(0), AM); break; case Instruction::PtrToInt: // Look past no-op ptrtoints. if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) - return X86SelectAddress(U->getOperand(0), AM, isCall); + return X86SelectAddress(U->getOperand(0), AM); break; case Instruction::Alloca: { - if (isCall) break; // Do static allocas. const AllocaInst *A = cast<AllocaInst>(V); DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A); @@ -361,21 +375,19 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { } case Instruction::Add: { - if (isCall) break; // Adds of constants are common and easy enough. if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); // They have to fit in the 32-bit signed displacement field though. if (isInt32(Disp)) { AM.Disp = (uint32_t)Disp; - return X86SelectAddress(U->getOperand(0), AM, isCall); + return X86SelectAddress(U->getOperand(0), AM); } } break; } case Instruction::GetElementPtr: { - if (isCall) break; // Pattern-match simple GEPs. uint64_t Disp = (int32_t)AM.Disp; unsigned IndexReg = AM.IndexReg; @@ -416,7 +428,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { AM.IndexReg = IndexReg; AM.Scale = Scale; AM.Disp = (uint32_t)Disp; - return X86SelectAddress(U->getOperand(0), AM, isCall); + return X86SelectAddress(U->getOperand(0), AM); unsupported_gep: // Ok, the GEP indices weren't all covered. break; @@ -426,8 +438,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { // Handle constant address. if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { // Can't handle alternate code models yet. - if (TM.getCodeModel() != CodeModel::Default && - TM.getCodeModel() != CodeModel::Small) + if (TM.getCodeModel() != CodeModel::Small) return false; // RIP-relative addresses can't have additional register operands. @@ -440,63 +451,149 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { if (GVar->isThreadLocal()) return false; - // Set up the basic address. + // Okay, we've committed to selecting this global. Set up the basic address. AM.GV = GV; - if (!isCall && - TM.getRelocationModel() == Reloc::PIC_ && - !Subtarget->is64Bit()) - AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF); + // Allow the subtarget to classify the global. + unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); - // Emit an extra load if the ABI requires it. - if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) { - // Check to see if we've already materialized this - // value in a register in this block. - DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V); - if (I != LocalValueMap.end() && I->second != 0) { - AM.Base.Reg = I->second; - AM.GV = 0; - return true; + // If this reference is relative to the pic base, set it now. + if (isGlobalRelativeToPICBase(GVFlags)) { + // FIXME: How do we know Base.Reg is free?? + AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF); + } + + // Unless the ABI requires an extra load, return a direct reference to + // the global. + if (!isGlobalStubReference(GVFlags)) { + if (Subtarget->isPICStyleRIPRel()) { + // Use rip-relative addressing if we can. Above we verified that the + // base and index registers are unused. + assert(AM.Base.Reg == 0 && AM.IndexReg == 0); + AM.Base.Reg = X86::RIP; } - + AM.GVOpFlags = GVFlags; + return true; + } + + // Ok, we need to do a load from a stub. If we've already loaded from this + // stub, reuse the loaded pointer, otherwise emit the load now. + DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V); + unsigned LoadReg; + if (I != LocalValueMap.end() && I->second != 0) { + LoadReg = I->second; + } else { // Issue load from stub. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; X86AddressMode StubAM; StubAM.Base.Reg = AM.Base.Reg; - StubAM.GV = AM.GV; - - if (TLI.getPointerTy() == MVT::i32) { - Opc = X86::MOV32rm; - RC = X86::GR32RegisterClass; - - if (Subtarget->isPICStyleGOT() && - TM.getRelocationModel() == Reloc::PIC_) - StubAM.GVOpFlags = X86II::MO_GOT; - - } else { + StubAM.GV = GV; + StubAM.GVOpFlags = GVFlags; + + if (TLI.getPointerTy() == MVT::i64) { Opc = X86::MOV64rm; RC = X86::GR64RegisterClass; - if (TM.getRelocationModel() != Reloc::Static) { - StubAM.GVOpFlags = X86II::MO_GOTPCREL; + if (Subtarget->isPICStyleRIPRel()) StubAM.Base.Reg = X86::RIP; - } + } else { + Opc = X86::MOV32rm; + RC = X86::GR32RegisterClass; } + + LoadReg = createResultReg(RC); + addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM); + + // Prevent loading GV stub multiple times in same MBB. + LocalValueMap[V] = LoadReg; + } + + // Now construct the final address. Note that the Disp, Scale, + // and Index values may already be set here. + AM.Base.Reg = LoadReg; + AM.GV = 0; + return true; + } - unsigned ResultReg = createResultReg(RC); - addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM); + // If all else fails, try to materialize the value in a register. + if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { + if (AM.Base.Reg == 0) { + AM.Base.Reg = getRegForValue(V); + return AM.Base.Reg != 0; + } + if (AM.IndexReg == 0) { + assert(AM.Scale == 1 && "Scale with no index!"); + AM.IndexReg = getRegForValue(V); + return AM.IndexReg != 0; + } + } - // Now construct the final address. Note that the Disp, Scale, - // and Index values may already be set here. - AM.Base.Reg = ResultReg; - AM.GV = 0; + return false; +} - // Prevent loading GV stub multiple times in same MBB. - LocalValueMap[V] = AM.Base.Reg; - } else if (Subtarget->isPICStyleRIPRel()) { - // Use rip-relative addressing if we can. +/// X86SelectCallAddress - Attempt to fill in an address from the given value. +/// +bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) { + User *U = NULL; + unsigned Opcode = Instruction::UserOp1; + if (Instruction *I = dyn_cast<Instruction>(V)) { + Opcode = I->getOpcode(); + U = I; + } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { + Opcode = C->getOpcode(); + U = C; + } + + switch (Opcode) { + default: break; + case Instruction::BitCast: + // Look past bitcasts. + return X86SelectCallAddress(U->getOperand(0), AM); + + case Instruction::IntToPtr: + // Look past no-op inttoptrs. + if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + return X86SelectCallAddress(U->getOperand(0), AM); + break; + + case Instruction::PtrToInt: + // Look past no-op ptrtoints. + if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + return X86SelectCallAddress(U->getOperand(0), AM); + break; + } + + // Handle constant address. + if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + // Can't handle alternate code models yet. + if (TM.getCodeModel() != CodeModel::Small) + return false; + + // RIP-relative addresses can't have additional register operands. + if (Subtarget->isPICStyleRIPRel() && + (AM.Base.Reg != 0 || AM.IndexReg != 0)) + return false; + + // Can't handle TLS or DLLImport. + if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage()) + return false; + + // Okay, we've committed to selecting this global. Set up the basic address. + AM.GV = GV; + + // No ABI requires an extra load for anything other than DLLImport, which + // we rejected above. Return a direct reference to the global. + if (Subtarget->isPICStyleRIPRel()) { + // Use rip-relative addressing if we can. Above we verified that the + // base and index registers are unused. + assert(AM.Base.Reg == 0 && AM.IndexReg == 0); AM.Base.Reg = X86::RIP; + } else if (Subtarget->isPICStyleStubPIC()) { + AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET; + } else if (Subtarget->isPICStyleGOT()) { + AM.GVOpFlags = X86II::MO_GOTOFF; } return true; @@ -518,14 +615,15 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { return false; } + /// X86SelectStore - Select and emit code to implement store instructions. bool X86FastISel::X86SelectStore(Instruction* I) { - MVT VT; - if (!isTypeLegal(I->getOperand(0)->getType(), VT)) + EVT VT; + if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; X86AddressMode AM; - if (!X86SelectAddress(I->getOperand(1), AM, false)) + if (!X86SelectAddress(I->getOperand(1), AM)) return false; return X86FastEmitStore(VT, I->getOperand(0), AM); @@ -534,12 +632,12 @@ bool X86FastISel::X86SelectStore(Instruction* I) { /// X86SelectLoad - Select and emit code to implement load instructions. /// bool X86FastISel::X86SelectLoad(Instruction *I) { - MVT VT; - if (!isTypeLegal(I->getType(), VT)) + EVT VT; + if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) return false; X86AddressMode AM; - if (!X86SelectAddress(I->getOperand(0), AM, false)) + if (!X86SelectAddress(I->getOperand(0), AM)) return false; unsigned ResultReg = 0; @@ -550,8 +648,8 @@ bool X86FastISel::X86SelectLoad(Instruction *I) { return false; } -static unsigned X86ChooseCmpOpcode(MVT VT) { - switch (VT.getSimpleVT()) { +static unsigned X86ChooseCmpOpcode(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { default: return 0; case MVT::i8: return X86::CMP8rr; case MVT::i16: return X86::CMP16rr; @@ -565,8 +663,8 @@ static unsigned X86ChooseCmpOpcode(MVT VT) { /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS /// of the comparison, return an opcode that works for the compare (e.g. /// CMP32ri) otherwise return 0. -static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) { - switch (VT.getSimpleVT()) { +static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) { + switch (VT.getSimpleVT().SimpleTy) { // Otherwise, we can't fold the immediate into this comparison. default: return 0; case MVT::i8: return X86::CMP8ri; @@ -581,13 +679,13 @@ static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) { } } -bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) { +bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) { unsigned Op0Reg = getRegForValue(Op0); if (Op0Reg == 0) return false; // Handle 'null' like i32/i64 0. if (isa<ConstantPointerNull>(Op1)) - Op1 = Constant::getNullValue(TD.getIntPtrType()); + Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext())); // We have two options: compare with register or immediate. If the RHS of // the compare is an immediate that we can fold into this compare, use @@ -613,7 +711,7 @@ bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) { bool X86FastISel::X86SelectCmp(Instruction *I) { CmpInst *CI = cast<CmpInst>(I); - MVT VT; + EVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT)) return false; @@ -688,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { bool X86FastISel::X86SelectZExt(Instruction *I) { // Handle zero-extension from i1 to i8, which is common. - if (I->getType() == Type::Int8Ty && - I->getOperand(0)->getType() == Type::Int1Ty) { + if (I->getType() == Type::getInt8Ty(I->getContext()) && + I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. @@ -713,7 +811,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { // Fold the common case of a conditional branch with a comparison. if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { if (CI->hasOneUse()) { - MVT VT = TLI.getValueType(CI->getOperand(0)->getType()); + EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); // Try to take advantage of fallthrough opportunities. CmpInst::Predicate Predicate = CI->getPredicate(); @@ -850,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { bool X86FastISel::X86SelectShift(Instruction *I) { unsigned CReg = 0, OpReg = 0, OpImm = 0; const TargetRegisterClass *RC = NULL; - if (I->getType() == Type::Int8Ty) { + if (I->getType() == Type::getInt8Ty(I->getContext())) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { @@ -859,7 +957,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; default: return false; } - } else if (I->getType() == Type::Int16Ty) { + } else if (I->getType() == Type::getInt16Ty(I->getContext())) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -868,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; default: return false; } - } else if (I->getType() == Type::Int32Ty) { + } else if (I->getType() == Type::getInt32Ty(I->getContext())) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { @@ -877,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; default: return false; } - } else if (I->getType() == Type::Int64Ty) { + } else if (I->getType() == Type::getInt64Ty(I->getContext())) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { @@ -890,7 +988,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { return false; } - MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); + EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) return false; @@ -924,7 +1022,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { } bool X86FastISel::X86SelectSelect(Instruction *I) { - MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); + EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) return false; @@ -959,9 +1057,10 @@ bool X86FastISel::X86SelectSelect(Instruction *I) { bool X86FastISel::X86SelectFPExt(Instruction *I) { // fpext from float to double. - if (Subtarget->hasSSE2() && I->getType() == Type::DoubleTy) { + if (Subtarget->hasSSE2() && + I->getType()->isDoubleTy()) { Value *V = I->getOperand(0); - if (V->getType() == Type::FloatTy) { + if (V->getType()->isFloatTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; unsigned ResultReg = createResultReg(X86::FR64RegisterClass); @@ -976,9 +1075,9 @@ bool X86FastISel::X86SelectFPExt(Instruction *I) { bool X86FastISel::X86SelectFPTrunc(Instruction *I) { if (Subtarget->hasSSE2()) { - if (I->getType() == Type::FloatTy) { + if (I->getType()->isFloatTy()) { Value *V = I->getOperand(0); - if (V->getType() == Type::DoubleTy) { + if (V->getType()->isDoubleTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; unsigned ResultReg = createResultReg(X86::FR32RegisterClass); @@ -996,8 +1095,8 @@ bool X86FastISel::X86SelectTrunc(Instruction *I) { if (Subtarget->is64Bit()) // All other cases should be handled by the tblgen generated code. return false; - MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - MVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); // This code only handles truncation to byte right now. if (DstVT != MVT::i8 && DstVT != MVT::i1) @@ -1065,7 +1164,7 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { const Type *RetTy = cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); - MVT VT; + EVT VT; if (!isTypeLegal(RetTy, VT)) return false; @@ -1125,7 +1224,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // Handle only C and fastcc calling conventions for now. CallSite CS(CI); - unsigned CC = CS.getCallingConv(); + CallingConv::ID CC = CS.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && CC != CallingConv::X86_FastCall) @@ -1144,8 +1243,8 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // Handle *simple* calls for now. const Type *RetTy = CS.getType(); - MVT RetVT; - if (RetTy == Type::VoidTy) + EVT RetVT; + if (RetTy->isVoidTy()) RetVT = MVT::isVoid; else if (!isTypeLegal(RetTy, RetVT, true)) return false; @@ -1153,7 +1252,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // Materialize callee address in a register. FIXME: GV address can be // handled with a CALLpcrel32 instead. X86AddressMode CalleeAM; - if (!X86SelectAddress(Callee, CalleeAM, true)) + if (!X86SelectCallAddress(Callee, CalleeAM)) return false; unsigned CalleeOp = 0; GlobalValue *GV = 0; @@ -1174,7 +1273,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // Deal with call operands first. SmallVector<Value*, 8> ArgVals; SmallVector<unsigned, 8> Args; - SmallVector<MVT, 8> ArgVTs; + SmallVector<EVT, 8> ArgVTs; SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; Args.reserve(CS.arg_size()); ArgVals.reserve(CS.arg_size()); @@ -1200,7 +1299,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { return false; const Type *ArgTy = (*i)->getType(); - MVT ArgVT; + EVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT)) return false; unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); @@ -1214,7 +1313,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CC, false, TM, ArgLocs); + CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); // Get a count of how many bytes are to be pushed on the stack. @@ -1230,11 +1329,11 @@ bool X86FastISel::X86SelectCall(Instruction *I) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; unsigned Arg = Args[VA.getValNo()]; - MVT ArgVT = ArgVTs[VA.getValNo()]; + EVT ArgVT = ArgVTs[VA.getValNo()]; // Promote the value if needed. switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); + default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: { bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), @@ -1266,6 +1365,14 @@ bool X86FastISel::X86SelectCall(Instruction *I) { ArgVT = VA.getLocVT(); break; } + case CCValAssign::BCvt: { + unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(), + ISD::BIT_CONVERT, Arg); + assert(BC != 0 && "Failed to emit a bitcast!"); + Arg = BC; + ArgVT = VA.getLocVT(); + break; + } } if (VA.isRegLoc()) { @@ -1294,28 +1401,53 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. - if (!Subtarget->is64Bit() && - TM.getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT()) { + if (Subtarget->isPICStyleGOT()) { TargetRegisterClass *RC = X86::GR32RegisterClass; unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC); assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; Emitted = true; } - + // Issue the call. - unsigned CallOpc = CalleeOp - ? (Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r) - : (Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32); - MachineInstrBuilder MIB = CalleeOp - ? BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp) - : BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV); + MachineInstrBuilder MIB; + if (CalleeOp) { + // Register-indirect call. + unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r; + MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp); + + } else { + // Direct call. + assert(GV && "Not a direct call"); + unsigned CallOpc = + Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; + + // See if we need any target-specific flags on the GV operand. + unsigned char OpFlags = 0; + + // On ELF targets, in both X86-64 and X86-32 mode, direct calls to + // external symbols most go through the PLT in PIC mode. If the symbol + // has hidden or protected visibility, or if it is static or local, then + // we don't need to use the PLT - we can directly call it. + if (Subtarget->isTargetELF() && + TM.getRelocationModel() == Reloc::PIC_ && + GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { + OpFlags = X86II::MO_PLT; + } else if (Subtarget->isPICStyleStubAny() && + (GV->isDeclaration() || GV->isWeakForLinker()) && + Subtarget->getDarwinVers() < 9) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = X86II::MO_DARWIN_STUB; + } + + + MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags); + } // Add an implicit use GOT pointer in EBX. - if (!Subtarget->is64Bit() && - TM.getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT()) + if (Subtarget->isPICStyleGOT()) MIB.addReg(X86::EBX); // Add implicit physical register uses to the call. @@ -1327,14 +1459,14 @@ bool X86FastISel::X86SelectCall(Instruction *I) { BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0); // Now handle call return value (if any). - if (RetVT.getSimpleVT() != MVT::isVoid) { + if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, false, TM, RVLocs); + CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext()); CCInfo.AnalyzeCallResult(RetVT, RetCC_X86); // Copy all of the result registers out of their specified physreg. assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); - MVT CopyVT = RVLocs[0].getValVT(); + EVT CopyVT = RVLocs[0].getValVT(); TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); TargetRegisterClass *SrcRC = DstRC; @@ -1358,7 +1490,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // Round the F80 the right size, which also moves to the appropriate xmm // register. This is accomplished by storing the F80 value in memory and // then loading it back. Ewww... - MVT ResVT = RVLocs[0].getValVT(); + EVT ResVT = RVLocs[0].getValVT(); unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; unsigned MemSize = ResVT.getSizeInBits()/8; int FI = MFI.CreateStackObject(MemSize, MemSize); @@ -1418,8 +1550,8 @@ X86FastISel::TargetSelectInstruction(Instruction *I) { return X86SelectExtractValue(I); case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { - MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - MVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); if (DstVT.bitsGT(SrcVT)) return X86SelectZExt(I); if (DstVT.bitsLT(SrcVT)) @@ -1435,14 +1567,14 @@ X86FastISel::TargetSelectInstruction(Instruction *I) { } unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { - MVT VT; + EVT VT; if (!isTypeLegal(C->getType(), VT)) return false; // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i8: Opc = X86::MOV8rm; @@ -1487,7 +1619,7 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { // Materialize addresses with LEA instructions. if (isa<GlobalValue>(C)) { X86AddressMode AM; - if (X86SelectAddress(C, AM, false)) { + if (X86SelectAddress(C, AM)) { if (TLI.getPointerTy() == MVT::i32) Opc = X86::LEA32r; else @@ -1509,16 +1641,15 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; unsigned char OpFlag = 0; - if (TM.getRelocationModel() == Reloc::PIC_) { - if (Subtarget->isPICStyleStub()) { - OpFlag = X86II::MO_PIC_BASE_OFFSET; - PICBase = getInstrInfo()->getGlobalBaseReg(&MF); - } else if (Subtarget->isPICStyleGOT()) { - OpFlag = X86II::MO_GOTOFF; - PICBase = getInstrInfo()->getGlobalBaseReg(&MF); - } else if (Subtarget->isPICStyleRIPRel() && - TM.getCodeModel() == CodeModel::Small) - PICBase = X86::RIP; + if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic + OpFlag = X86II::MO_PIC_BASE_OFFSET; + PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + } else if (Subtarget->isPICStyleGOT()) { + OpFlag = X86II::MO_GOTOFF; + PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + } else if (Subtarget->isPICStyleRIPRel() && + TM.getCodeModel() == CodeModel::Small) { + PICBase = X86::RIP; } // Create the load from the constant pool. @@ -1542,7 +1673,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) { return 0; X86AddressMode AM; - if (!X86SelectAddress(C, AM, false)) + if (!X86SelectAddress(C, AM)) return 0; unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 37027ee..d9a05a8 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -31,19 +31,21 @@ #define DEBUG_TYPE "x86-codegen" #include "X86.h" #include "X86InstrInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Compiler.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include <algorithm> using namespace llvm; @@ -56,6 +58,7 @@ namespace { FPS() : MachineFunctionPass(&ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -73,12 +76,12 @@ namespace { unsigned StackTop; // The current top of the FP stack. void dumpStack() const { - cerr << "Stack contents:"; + errs() << "Stack contents:"; for (unsigned i = 0; i != StackTop; ++i) { - cerr << " FP" << Stack[i]; + errs() << " FP" << Stack[i]; assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); } - cerr << "\n"; + errs() << "\n"; } private: /// isStackEmpty - Return true if the FP stack is empty. @@ -210,6 +213,14 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { I != E; ++I) Changed |= processBasicBlock(MF, **I); + // Process any unreachable blocks in arbitrary order now. + if (MF.size() == Processed.size()) + return Changed; + + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) + if (Processed.insert(BB)) + Changed |= processBasicBlock(MF, *BB); + return Changed; } @@ -236,7 +247,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { PrevMI = prior(I); ++NumFP; // Keep track of # of pseudo instrs - DOUT << "\nFPInst:\t" << *MI; + DEBUG(errs() << "\nFPInst:\t" << *MI); // Get dead variables list now because the MI pointer may be deleted as part // of processing! @@ -255,7 +266,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { case X86II::CompareFP: handleCompareFP(I); break; case X86II::CondMovFP: handleCondMovFP(I); break; case X86II::SpecialFP: handleSpecialFP(I); break; - default: assert(0 && "Unknown FP Type!"); + default: llvm_unreachable("Unknown FP Type!"); } // Check to see if any of the values defined by this instruction are dead @@ -263,7 +274,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { unsigned Reg = DeadRegs[i]; if (Reg >= X86::FP0 && Reg <= X86::FP6) { - DOUT << "Register FP#" << Reg-X86::FP0 << " is dead!\n"; + DEBUG(errs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); freeStackSlotAfter(I, Reg-X86::FP0); } } @@ -272,13 +283,13 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { DEBUG( MachineBasicBlock::iterator PrevI(PrevMI); if (I == PrevI) { - cerr << "Just deleted pseudo instruction\n"; + errs() << "Just deleted pseudo instruction\n"; } else { MachineBasicBlock::iterator Start = I; // Rewind to first instruction newly inserted. while (Start != BB.begin() && prior(Start) != PrevI) --Start; - cerr << "Inserted instructions:\n\t"; - Start->print(*cerr.stream(), &MF.getTarget()); + errs() << "Inserted instructions:\n\t"; + Start->print(errs(), &MF.getTarget()); while (++Start != next(I)) {} } dumpStack(); @@ -945,7 +956,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { MachineInstr *MI = I; DebugLoc dl = MI->getDebugLoc(); switch (MI->getOpcode()) { - default: assert(0 && "Unknown SpecialFP instruction!"); + default: llvm_unreachable("Unknown SpecialFP instruction!"); case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 009846e..3e0385c 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -35,6 +35,7 @@ namespace { FPRegKiller() : MachineFunctionPass(&ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -117,9 +118,10 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { !ContainsFPCode && SI != E; ++SI) { for (BasicBlock::const_iterator II = SI->begin(); (PN = dyn_cast<PHINode>(II)); ++II) { - if (PN->getType()==Type::X86_FP80Ty || + if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) || (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) || - (!Subtarget.hasSSE2() && PN->getType()==Type::DoubleTy)) { + (!Subtarget.hasSSE2() && + PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) { ContainsFPCode = true; break; } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 1336177..5b678fb 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -35,8 +35,9 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -78,7 +79,8 @@ namespace { X86ISelAddressMode() : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), - Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), SymbolFlags(0) { + Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), + SymbolFlags(X86II::MO_NO_FLAG) { } bool hasSymbolicDisplacement() const { @@ -105,23 +107,37 @@ namespace { } void dump() { - cerr << "X86ISelAddressMode " << this << "\n"; - cerr << "Base.Reg "; - if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); - else cerr << "nul"; - cerr << " Base.FrameIndex " << Base.FrameIndex << "\n"; - cerr << " Scale" << Scale << "\n"; - cerr << "IndexReg "; - if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); - else cerr << "nul"; - cerr << " Disp " << Disp << "\n"; - cerr << "GV "; if (GV) GV->dump(); - else cerr << "nul"; - cerr << " CP "; if (CP) CP->dump(); - else cerr << "nul"; - cerr << "\n"; - cerr << "ES "; if (ES) cerr << ES; else cerr << "nul"; - cerr << " JT" << JT << " Align" << Align << "\n"; + errs() << "X86ISelAddressMode " << this << '\n'; + errs() << "Base.Reg "; + if (Base.Reg.getNode() != 0) + Base.Reg.getNode()->dump(); + else + errs() << "nul"; + errs() << " Base.FrameIndex " << Base.FrameIndex << '\n' + << " Scale" << Scale << '\n' + << "IndexReg "; + if (IndexReg.getNode() != 0) + IndexReg.getNode()->dump(); + else + errs() << "nul"; + errs() << " Disp " << Disp << '\n' + << "GV "; + if (GV) + GV->dump(); + else + errs() << "nul"; + errs() << " CP "; + if (CP) + CP->dump(); + else + errs() << "nul"; + errs() << '\n' + << "ES "; + if (ES) + errs() << ES; + else + errs() << "nul"; + errs() << " JT" << JT << " Align" << Align << '\n'; } }; } @@ -140,10 +156,6 @@ namespace { /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; - /// CurBB - Current BB being isel'd. - /// - MachineBasicBlock *CurBB; - /// OptForSize - If true, selector should try to optimize for code size /// instead of performance. bool OptForSize; @@ -174,12 +186,14 @@ namespace { private: SDNode *Select(SDValue N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); + SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); bool MatchLoad(SDValue N, X86ISelAddressMode &AM); bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); - bool MatchAddress(SDValue N, X86ISelAddressMode &AM, - unsigned Depth = 0); + bool MatchAddress(SDValue N, X86ISelAddressMode &AM); + bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, + unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, @@ -342,13 +356,17 @@ static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, Ops.push_back(Load.getOperand(0)); else Ops.push_back(TF.getOperand(i)); - CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); - CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); - CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), + SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); + SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, + Load.getOperand(1), + Load.getOperand(2)); + CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), Store.getOperand(2), Store.getOperand(3)); } -/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. +/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The +/// chain produced by the load must only be used by the store's chain operand, +/// otherwise this may produce a cycle in the DAG. /// static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, SDValue &Load) { @@ -366,8 +384,9 @@ static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, return false; if (N.hasOneUse() && + LD->hasNUsesOfValue(1, 1) && N.getOperand(1) == Address && - N.getNode()->isOperandOf(Chain.getNode())) { + LD->isOperandOf(Chain.getNode())) { Load = N; return true; } @@ -431,7 +450,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { if (Chain.getOperand(0).getNode() == Callee.getNode()) return true; if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && - Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode())) + Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && + Callee.getValue(1).hasOneUse()) return true; return false; } @@ -583,8 +603,8 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { // If the source and destination are SSE registers, then this is a legal // conversion that should not be lowered. - MVT SrcVT = N->getOperand(0).getValueType(); - MVT DstVT = N->getValueType(0); + EVT SrcVT = N->getOperand(0).getValueType(); + EVT DstVT = N->getValueType(0); bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); if (SrcIsSSE && DstIsSSE) @@ -602,7 +622,7 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { // Here we could have an FP stack truncation or an FPStack <-> SSE convert. // FPStack has extload and truncstore. SSE can fold direct loads into other // operations. Based on this, decide what we want to do. - MVT MemVT; + EVT MemVT; if (N->getOpcode() == ISD::FP_ROUND) MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. else @@ -635,8 +655,7 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel /// when it has created a SelectionDAG for us to codegen. void X86DAGToDAGISel::InstructionSelect() { - CurBB = BB; // BB can change as result of isel. - const Function *F = CurDAG->getMachineFunction().getFunction(); + const Function *F = MF->getFunction(); OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); DEBUG(BB->dump()); @@ -648,12 +667,12 @@ void X86DAGToDAGISel::InstructionSelect() { // Codegen the basic block. #ifndef NDEBUG - DOUT << "===== Instruction selection begins:\n"; + DEBUG(errs() << "===== Instruction selection begins:\n"); Indent = 0; #endif SelectRoot(*CurDAG); #ifndef NDEBUG - DOUT << "===== Instruction selection ends:\n"; + DEBUG(errs() << "===== Instruction selection ends:\n"); #endif CurDAG->RemoveDeadNodes(); @@ -706,7 +725,7 @@ bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { /// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes /// into an addressing mode. These wrap things that will resolve down into a /// symbol reference. If no match is possible, this returns true, otherwise it -/// returns false. +/// returns false. bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { // If the addressing mode already has a symbol as the displacement, we can // never match another symbol. @@ -714,28 +733,27 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { return true; SDValue N0 = N.getOperand(0); - + CodeModel::Model M = TM.getCodeModel(); + // Handle X86-64 rip-relative addresses. We check this before checking direct // folding because RIP is preferable to non-RIP accesses. if (Subtarget->is64Bit() && // Under X86-64 non-small code model, GV (and friends) are 64-bits, so // they cannot be folded into immediate fields. // FIXME: This can be improved for kernel and other models? - TM.getCodeModel() == CodeModel::Small && - + (M == CodeModel::Small || M == CodeModel::Kernel) && // Base and index reg must be 0 in order to use %rip as base and lowering // must allow RIP. !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { int64_t Offset = AM.Disp + G->getOffset(); - if (!isInt32(Offset)) return true; + if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; AM.GV = G->getGlobal(); AM.Disp = Offset; AM.SymbolFlags = G->getTargetFlags(); } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { int64_t Offset = AM.Disp + CP->getOffset(); - if (!isInt32(Offset)) return true; + if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; AM.CP = CP->getConstVal(); AM.Align = CP->getAlignment(); AM.Disp = Offset; @@ -748,7 +766,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { AM.JT = J->getIndex(); AM.SymbolFlags = J->getTargetFlags(); } - + if (N.getOpcode() == X86ISD::WrapperRIP) AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); return false; @@ -758,7 +776,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit // mode, this results in a non-RIP-relative computation. if (!Subtarget->is64Bit() || - (TM.getCodeModel() == CodeModel::Small && + ((M == CodeModel::Small || M == CodeModel::Kernel) && TM.getRelocationModel() == Reloc::Static)) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { AM.GV = G->getGlobal(); @@ -786,15 +804,49 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { /// MatchAddress - Add the specified node to the specified addressing mode, /// returning true if it cannot be done. This just pattern matches for the /// addressing mode. -bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, - unsigned Depth) { +bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { + if (MatchAddressRecursively(N, AM, 0)) + return true; + + // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has + // a smaller encoding and avoids a scaled-index. + if (AM.Scale == 2 && + AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base.Reg.getNode() == 0) { + AM.Base.Reg = AM.IndexReg; + AM.Scale = 1; + } + + // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, + // because it has a smaller encoding. + // TODO: Which other code models can use this? + if (TM.getCodeModel() == CodeModel::Small && + Subtarget->is64Bit() && + AM.Scale == 1 && + AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base.Reg.getNode() == 0 && + AM.IndexReg.getNode() == 0 && + AM.SymbolFlags == X86II::MO_NO_FLAG && + AM.hasSymbolicDisplacement()) + AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64); + + return false; +} + +bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, + unsigned Depth) { bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); - DOUT << "MatchAddress: "; DEBUG(AM.dump()); + DEBUG({ + errs() << "MatchAddress: "; + AM.dump(); + }); // Limit recursion. if (Depth > 5) return MatchAddressBase(N, AM); - + + CodeModel::Model M = TM.getCodeModel(); + // If this is already a %rip relative address, we can only merge immediates // into it. Instead of handling this in every case, we handle it here. // RIP relative addressing: %rip + 32-bit displacement! @@ -803,10 +855,11 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, // displacements. It isn't very important, but this should be fixed for // consistency. if (!AM.ES && AM.JT != -1) return true; - + if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) { int64_t Val = AM.Disp + Cst->getSExtValue(); - if (isInt32(Val)) { + if (X86::isOffsetSuitableForCodeModel(Val, M, + AM.hasSymbolicDisplacement())) { AM.Disp = Val; return false; } @@ -818,7 +871,9 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, default: break; case ISD::Constant: { uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); - if (!is64Bit || isInt32(AM.Disp + Val)) { + if (!is64Bit || + X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M, + AM.hasSymbolicDisplacement())) { AM.Disp += Val; return false; } @@ -857,6 +912,10 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { unsigned Val = CN->getZExtValue(); + // Note that we handle x<<1 as (,x,2) rather than (x,x) here so + // that the base operand remains free for further matching. If + // the base doesn't end up getting used, a post-processing step + // in MatchAddress turns (,x,2) into (x,x), which is cheaper. if (Val == 1 || Val == 2 || Val == 3) { AM.Scale = 1 << Val; SDValue ShVal = N.getNode()->getOperand(0); @@ -870,7 +929,9 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); - if (!is64Bit || isInt32(Disp)) + if (!is64Bit || + X86::isOffsetSuitableForCodeModel(Disp, M, + AM.hasSymbolicDisplacement())) AM.Disp = Disp; else AM.IndexReg = ShVal; @@ -912,7 +973,9 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); uint64_t Disp = AM.Disp + AddVal->getSExtValue() * CN->getZExtValue(); - if (!is64Bit || isInt32(Disp)) + if (!is64Bit || + X86::isOffsetSuitableForCodeModel(Disp, M, + AM.hasSymbolicDisplacement())) AM.Disp = Disp; else Reg = N.getNode()->getOperand(0); @@ -936,7 +999,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, // Test if the LHS of the sub can be folded. X86ISelAddressMode Backup = AM; - if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) { + if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { AM = Backup; break; } @@ -998,12 +1061,12 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, case ISD::ADD: { X86ISelAddressMode Backup = AM; - if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) && - !MatchAddress(N.getNode()->getOperand(1), AM, Depth+1)) + if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) && + !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1)) return false; AM = Backup; - if (!MatchAddress(N.getNode()->getOperand(1), AM, Depth+1) && - !MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) + if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) && + !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) return false; AM = Backup; @@ -1027,11 +1090,13 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, X86ISelAddressMode Backup = AM; uint64_t Offset = CN->getSExtValue(); // Start with the LHS as an addr mode. - if (!MatchAddress(N.getOperand(0), AM, Depth+1) && + if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && // Address could not have picked a GV address for the displacement. AM.GV == NULL && // On x86-64, the resultant disp must fit in 32-bits. - (!is64Bit || isInt32(AM.Disp + Offset)) && + (!is64Bit || + X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, + AM.hasSymbolicDisplacement())) && // Check to see if the LHS & C is zero. CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { AM.Disp += Offset; @@ -1219,7 +1284,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, if (!Done && MatchAddress(N, AM)) return false; - MVT VT = N.getValueType(); + EVT VT = N.getValueType(); if (AM.BaseType == X86ISelAddressMode::RegBase) { if (!AM.Base.Reg.getNode()) AM.Base.Reg = CurDAG->getRegister(0, VT); @@ -1292,7 +1357,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, assert (T == AM.Segment); AM.Segment = Copy; - MVT VT = N.getValueType(); + EVT VT = N.getValueType(); unsigned Complexity = 0; if (AM.BaseType == X86ISelAddressMode::RegBase) if (AM.Base.Reg.getNode()) @@ -1329,12 +1394,13 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) Complexity++; - if (Complexity > 2) { - SDValue Segment; - getAddressOperands(AM, Base, Scale, Index, Disp, Segment); - return true; - } - return false; + // If it isn't worth using an LEA, reject it. + if (Complexity <= 2) + return false; + + SDValue Segment; + getAddressOperands(AM, Base, Scale, Index, Disp, Segment); + return true; } /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. @@ -1380,7 +1446,6 @@ bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, /// initialize the global base register, if necessary. /// SDNode *X86DAGToDAGISel::getGlobalBaseReg() { - MachineFunction *MF = CurBB->getParent(); unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } @@ -1400,367 +1465,686 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return NULL; - SDValue LSI = Node->getOperand(4); // MemOperand - const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain}; - return CurDAG->getTargetNode(Opc, Node->getDebugLoc(), - MVT::i32, MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); + const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; + SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), + MVT::i32, MVT::i32, MVT::Other, Ops, + array_lengthof(Ops)); + cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); + return ResNode; +} + +SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { + if (Node->hasAnyUseOfValue(0)) + return 0; + + // Optimize common patterns for __sync_add_and_fetch and + // __sync_sub_and_fetch where the result is not used. This allows us + // to use "lock" version of add, sub, inc, dec instructions. + // FIXME: Do not use special instructions but instead add the "lock" + // prefix to the target node somehow. The extra information will then be + // transferred to machine instruction and it denotes the prefix. + SDValue Chain = Node->getOperand(0); + SDValue Ptr = Node->getOperand(1); + SDValue Val = Node->getOperand(2); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + return 0; + + bool isInc = false, isDec = false, isSub = false, isCN = false; + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); + if (CN) { + isCN = true; + int64_t CNVal = CN->getSExtValue(); + if (CNVal == 1) + isInc = true; + else if (CNVal == -1) + isDec = true; + else if (CNVal >= 0) + Val = CurDAG->getTargetConstant(CNVal, NVT); + else { + isSub = true; + Val = CurDAG->getTargetConstant(-CNVal, NVT); + } + } else if (Val.hasOneUse() && + Val.getOpcode() == ISD::SUB && + X86::isZeroNode(Val.getOperand(0))) { + isSub = true; + Val = Val.getOperand(1); + } + + unsigned Opc = 0; + switch (NVT.getSimpleVT().SimpleTy) { + default: return 0; + case MVT::i8: + if (isInc) + Opc = X86::LOCK_INC8m; + else if (isDec) + Opc = X86::LOCK_DEC8m; + else if (isSub) { + if (isCN) + Opc = X86::LOCK_SUB8mi; + else + Opc = X86::LOCK_SUB8mr; + } else { + if (isCN) + Opc = X86::LOCK_ADD8mi; + else + Opc = X86::LOCK_ADD8mr; + } + break; + case MVT::i16: + if (isInc) + Opc = X86::LOCK_INC16m; + else if (isDec) + Opc = X86::LOCK_DEC16m; + else if (isSub) { + if (isCN) { + if (Predicate_i16immSExt8(Val.getNode())) + Opc = X86::LOCK_SUB16mi8; + else + Opc = X86::LOCK_SUB16mi; + } else + Opc = X86::LOCK_SUB16mr; + } else { + if (isCN) { + if (Predicate_i16immSExt8(Val.getNode())) + Opc = X86::LOCK_ADD16mi8; + else + Opc = X86::LOCK_ADD16mi; + } else + Opc = X86::LOCK_ADD16mr; + } + break; + case MVT::i32: + if (isInc) + Opc = X86::LOCK_INC32m; + else if (isDec) + Opc = X86::LOCK_DEC32m; + else if (isSub) { + if (isCN) { + if (Predicate_i32immSExt8(Val.getNode())) + Opc = X86::LOCK_SUB32mi8; + else + Opc = X86::LOCK_SUB32mi; + } else + Opc = X86::LOCK_SUB32mr; + } else { + if (isCN) { + if (Predicate_i32immSExt8(Val.getNode())) + Opc = X86::LOCK_ADD32mi8; + else + Opc = X86::LOCK_ADD32mi; + } else + Opc = X86::LOCK_ADD32mr; + } + break; + case MVT::i64: + if (isInc) + Opc = X86::LOCK_INC64m; + else if (isDec) + Opc = X86::LOCK_DEC64m; + else if (isSub) { + Opc = X86::LOCK_SUB64mr; + if (isCN) { + if (Predicate_i64immSExt8(Val.getNode())) + Opc = X86::LOCK_SUB64mi8; + else if (Predicate_i64immSExt32(Val.getNode())) + Opc = X86::LOCK_SUB64mi32; + } + } else { + Opc = X86::LOCK_ADD64mr; + if (isCN) { + if (Predicate_i64immSExt8(Val.getNode())) + Opc = X86::LOCK_ADD64mi8; + else if (Predicate_i64immSExt32(Val.getNode())) + Opc = X86::LOCK_ADD64mi32; + } + } + break; + } + + DebugLoc dl = Node->getDebugLoc(); + SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + dl, NVT), 0); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); + if (isInc || isDec) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; + SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0); + cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); + SDValue RetVals[] = { Undef, Ret }; + return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); + } else { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; + SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); + cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); + SDValue RetVals[] = { Undef, Ret }; + return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); + } +} + +/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has +/// any uses which require the SF or OF bits to be accurate. +static bool HasNoSignedComparisonUses(SDNode *N) { + // Examine each user of the node. + for (SDNode::use_iterator UI = N->use_begin(), + UE = N->use_end(); UI != UE; ++UI) { + // Only examine CopyToReg uses. + if (UI->getOpcode() != ISD::CopyToReg) + return false; + // Only examine CopyToReg uses that copy to EFLAGS. + if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != + X86::EFLAGS) + return false; + // Examine each user of the CopyToReg use. + for (SDNode::use_iterator FlagUI = UI->use_begin(), + FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { + // Only examine the Flag result. + if (FlagUI.getUse().getResNo() != 1) continue; + // Anything unusual: assume conservatively. + if (!FlagUI->isMachineOpcode()) return false; + // Examine the opcode of the user. + switch (FlagUI->getMachineOpcode()) { + // These comparisons don't treat the most significant bit specially. + case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr: + case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: + case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: + case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: + case X86::JA: case X86::JAE: case X86::JB: case X86::JBE: + case X86::JE: case X86::JNE: case X86::JP: case X86::JNP: + case X86::CMOVA16rr: case X86::CMOVA16rm: + case X86::CMOVA32rr: case X86::CMOVA32rm: + case X86::CMOVA64rr: case X86::CMOVA64rm: + case X86::CMOVAE16rr: case X86::CMOVAE16rm: + case X86::CMOVAE32rr: case X86::CMOVAE32rm: + case X86::CMOVAE64rr: case X86::CMOVAE64rm: + case X86::CMOVB16rr: case X86::CMOVB16rm: + case X86::CMOVB32rr: case X86::CMOVB32rm: + case X86::CMOVB64rr: case X86::CMOVB64rm: + case X86::CMOVBE16rr: case X86::CMOVBE16rm: + case X86::CMOVBE32rr: case X86::CMOVBE32rm: + case X86::CMOVBE64rr: case X86::CMOVBE64rm: + case X86::CMOVE16rr: case X86::CMOVE16rm: + case X86::CMOVE32rr: case X86::CMOVE32rm: + case X86::CMOVE64rr: case X86::CMOVE64rm: + case X86::CMOVNE16rr: case X86::CMOVNE16rm: + case X86::CMOVNE32rr: case X86::CMOVNE32rm: + case X86::CMOVNE64rr: case X86::CMOVNE64rm: + case X86::CMOVNP16rr: case X86::CMOVNP16rm: + case X86::CMOVNP32rr: case X86::CMOVNP32rm: + case X86::CMOVNP64rr: case X86::CMOVNP64rm: + case X86::CMOVP16rr: case X86::CMOVP16rm: + case X86::CMOVP32rr: case X86::CMOVP32rm: + case X86::CMOVP64rr: case X86::CMOVP64rm: + continue; + // Anything else: assume conservatively. + default: return false; + } + } + } + return true; } SDNode *X86DAGToDAGISel::Select(SDValue N) { SDNode *Node = N.getNode(); - MVT NVT = Node->getValueType(0); + EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); #ifndef NDEBUG - DOUT << std::string(Indent, ' ') << "Selecting: "; - DEBUG(Node->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent, ' ') << "Selecting: "; + Node->dump(CurDAG); + errs() << '\n'; + }); Indent += 2; #endif if (Node->isMachineOpcode()) { #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "== "; - DEBUG(Node->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent-2, ' ') << "== "; + Node->dump(CurDAG); + errs() << '\n'; + }); Indent -= 2; #endif return NULL; // Already selected. } switch (Opcode) { - default: break; - case X86ISD::GlobalBaseReg: - return getGlobalBaseReg(); - - case X86ISD::ATOMOR64_DAG: - return SelectAtomic64(Node, X86::ATOMOR6432); - case X86ISD::ATOMXOR64_DAG: - return SelectAtomic64(Node, X86::ATOMXOR6432); - case X86ISD::ATOMADD64_DAG: - return SelectAtomic64(Node, X86::ATOMADD6432); - case X86ISD::ATOMSUB64_DAG: - return SelectAtomic64(Node, X86::ATOMSUB6432); - case X86ISD::ATOMNAND64_DAG: - return SelectAtomic64(Node, X86::ATOMNAND6432); - case X86ISD::ATOMAND64_DAG: - return SelectAtomic64(Node, X86::ATOMAND6432); - case X86ISD::ATOMSWAP64_DAG: - return SelectAtomic64(Node, X86::ATOMSWAP6432); - - case ISD::SMUL_LOHI: - case ISD::UMUL_LOHI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - - bool isSigned = Opcode == ISD::SMUL_LOHI; - if (!isSigned) - switch (NVT.getSimpleVT()) { - default: assert(0 && "Unsupported VT!"); - case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; - case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; - case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; - case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; - } - else - switch (NVT.getSimpleVT()) { - default: assert(0 && "Unsupported VT!"); - case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; - case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; - case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; - case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; - } + default: break; + case X86ISD::GlobalBaseReg: + return getGlobalBaseReg(); + + case X86ISD::ATOMOR64_DAG: + return SelectAtomic64(Node, X86::ATOMOR6432); + case X86ISD::ATOMXOR64_DAG: + return SelectAtomic64(Node, X86::ATOMXOR6432); + case X86ISD::ATOMADD64_DAG: + return SelectAtomic64(Node, X86::ATOMADD6432); + case X86ISD::ATOMSUB64_DAG: + return SelectAtomic64(Node, X86::ATOMSUB6432); + case X86ISD::ATOMNAND64_DAG: + return SelectAtomic64(Node, X86::ATOMNAND6432); + case X86ISD::ATOMAND64_DAG: + return SelectAtomic64(Node, X86::ATOMAND6432); + case X86ISD::ATOMSWAP64_DAG: + return SelectAtomic64(Node, X86::ATOMSWAP6432); + + case ISD::ATOMIC_LOAD_ADD: { + SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); + if (RetVal) + return RetVal; + break; + } - unsigned LoReg, HiReg; - switch (NVT.getSimpleVT()) { - default: assert(0 && "Unsupported VT!"); - case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; - case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; - case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; - case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + bool isSigned = Opcode == ISD::SMUL_LOHI; + if (!isSigned) { + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; + case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; + case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; + case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; } - - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); - // multiplty is commmutative - if (!foldedLoad) { - foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); - if (foldedLoad) - std::swap(N0, N1); + } else { + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; + case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; + case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; + case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; } + } - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, - N0, SDValue()).getValue(1); - - if (foldedLoad) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), - InFlag }; - SDNode *CNode = - CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, - array_lengthof(Ops)); - InFlag = SDValue(CNode, 1); - // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); - } else { - InFlag = - SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0); - } + unsigned LoReg, HiReg; + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; + case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; + case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; + case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; + } - // Copy the low half of the result, if it is needed. - if (!N.getValue(0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - LoReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(N.getValue(0), Result); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + // Multiply is commmutative. + if (!foldedLoad) { + foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + if (foldedLoad) + std::swap(N0, N1); + } + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + N0, SDValue()).getValue(1); + + if (foldedLoad) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), + InFlag }; + SDNode *CNode = + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, + array_lengthof(Ops)); + InFlag = SDValue(CNode, 1); + // Update the chain. + ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + } else { + InFlag = + SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + } + + // Copy the low half of the result, if it is needed. + if (!N.getValue(0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + LoReg, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(N.getValue(0), Result); #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - DEBUG(Result.getNode()->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent-2, ' ') << "=> "; + Result.getNode()->dump(CurDAG); + errs() << '\n'; + }); #endif + } + // Copy the high half of the result, if it is needed. + if (!N.getValue(1).use_empty()) { + SDValue Result; + if (HiReg == X86::AH && Subtarget->is64Bit()) { + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), 0); + // Then truncate it down to i8. + Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + MVT::i8, Result); + } else { + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); } - // Copy the high half of the result, if it is needed. - if (!N.getValue(1).use_empty()) { - SDValue Result; - if (HiReg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), 0); - // Then truncate it down to i8. - SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32); - Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl, - MVT::i8, Result, SRIdx), 0); - } else { - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - } - ReplaceUses(N.getValue(1), Result); + ReplaceUses(N.getValue(1), Result); #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - DEBUG(Result.getNode()->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent-2, ' ') << "=> "; + Result.getNode()->dump(CurDAG); + errs() << '\n'; + }); #endif - } + } #ifndef NDEBUG - Indent -= 2; + Indent -= 2; #endif - return NULL; - } - - case ISD::SDIVREM: - case ISD::UDIVREM: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - - bool isSigned = Opcode == ISD::SDIVREM; - if (!isSigned) - switch (NVT.getSimpleVT()) { - default: assert(0 && "Unsupported VT!"); - case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; - case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; - case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; - case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; - } - else - switch (NVT.getSimpleVT()) { - default: assert(0 && "Unsupported VT!"); - case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; - case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; - case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; - case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; - } + return NULL; + } - unsigned LoReg, HiReg; - unsigned ClrOpcode, SExtOpcode; - switch (NVT.getSimpleVT()) { - default: assert(0 && "Unsupported VT!"); - case MVT::i8: - LoReg = X86::AL; HiReg = X86::AH; - ClrOpcode = 0; - SExtOpcode = X86::CBW; - break; - case MVT::i16: - LoReg = X86::AX; HiReg = X86::DX; - ClrOpcode = X86::MOV16r0; - SExtOpcode = X86::CWD; - break; - case MVT::i32: - LoReg = X86::EAX; HiReg = X86::EDX; - ClrOpcode = X86::MOV32r0; - SExtOpcode = X86::CDQ; - break; - case MVT::i64: - LoReg = X86::RAX; HiReg = X86::RDX; - ClrOpcode = X86::MOV64r0; - SExtOpcode = X86::CQO; - break; + case ISD::SDIVREM: + case ISD::UDIVREM: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + bool isSigned = Opcode == ISD::SDIVREM; + if (!isSigned) { + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; + case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; + case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; + case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; } + } else { + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; + case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; + case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; + case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; + } + } - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); - bool signBitIsZero = CurDAG->SignBitIsZero(N0); - - SDValue InFlag; - if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { - // Special case for div8, just use a move with zero extension to AX to - // clear the upper 8 bits (AH). - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; - if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; - Move = - SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16, - MVT::Other, Ops, - array_lengthof(Ops)), 0); - Chain = Move.getValue(1); - ReplaceUses(N0.getValue(1), Chain); - } else { - Move = - SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); - Chain = CurDAG->getEntryNode(); - } - Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); - InFlag = Chain.getValue(1); + unsigned LoReg, HiReg; + unsigned ClrOpcode, SExtOpcode; + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: + LoReg = X86::AL; HiReg = X86::AH; + ClrOpcode = 0; + SExtOpcode = X86::CBW; + break; + case MVT::i16: + LoReg = X86::AX; HiReg = X86::DX; + ClrOpcode = X86::MOV16r0; + SExtOpcode = X86::CWD; + break; + case MVT::i32: + LoReg = X86::EAX; HiReg = X86::EDX; + ClrOpcode = X86::MOV32r0; + SExtOpcode = X86::CDQ; + break; + case MVT::i64: + LoReg = X86::RAX; HiReg = X86::RDX; + ClrOpcode = ~0U; // NOT USED. + SExtOpcode = X86::CQO; + break; + } + + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + bool signBitIsZero = CurDAG->SignBitIsZero(N0); + + SDValue InFlag; + if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { + // Special case for div8, just use a move with zero extension to AX to + // clear the upper 8 bits (AH). + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; + if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; + Move = + SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16, + MVT::Other, Ops, + array_lengthof(Ops)), 0); + Chain = Move.getValue(1); + ReplaceUses(N0.getValue(1), Chain); } else { + Move = + SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); + Chain = CurDAG->getEntryNode(); + } + Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); + InFlag = Chain.getValue(1); + } else { + InFlag = + CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, + LoReg, N0, SDValue()).getValue(1); + if (isSigned && !signBitIsZero) { + // Sign extend the low part into the high part. InFlag = - CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, - LoReg, N0, SDValue()).getValue(1); - if (isSigned && !signBitIsZero) { - // Sign extend the low part into the high part. - InFlag = - SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0); + SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); + } else { + // Zero out the high part, effectively zero extending the input. + SDValue ClrNode; + + if (NVT.getSimpleVT() == MVT::i64) { + ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32), + 0); + // We just did a 32-bit clear, insert it into a 64-bit register to + // clear the whole 64-bit reg. + SDValue Undef = + SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + dl, MVT::i64), 0); + SDValue SubRegNo = + CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); + ClrNode = + SDValue(CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, + MVT::i64, Undef, ClrNode, SubRegNo), + 0); } else { - // Zero out the high part, effectively zero extending the input. - SDValue ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT), - 0); - InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg, - ClrNode, InFlag).getValue(1); + ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); } - } - if (foldedLoad) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), - InFlag }; - SDNode *CNode = - CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, - array_lengthof(Ops)); - InFlag = SDValue(CNode, 1); - // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); - } else { - InFlag = - SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg, + ClrNode, InFlag).getValue(1); } + } - // Copy the division (low) result, if it is needed. - if (!N.getValue(0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - LoReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(N.getValue(0), Result); + if (foldedLoad) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), + InFlag }; + SDNode *CNode = + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, + array_lengthof(Ops)); + InFlag = SDValue(CNode, 1); + // Update the chain. + ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + } else { + InFlag = + SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + } + + // Copy the division (low) result, if it is needed. + if (!N.getValue(0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + LoReg, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(N.getValue(0), Result); #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - DEBUG(Result.getNode()->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent-2, ' ') << "=> "; + Result.getNode()->dump(CurDAG); + errs() << '\n'; + }); #endif + } + // Copy the remainder (high) result, if it is needed. + if (!N.getValue(1).use_empty()) { + SDValue Result; + if (HiReg == X86::AH && Subtarget->is64Bit()) { + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), + 0); + // Then truncate it down to i8. + Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + MVT::i8, Result); + } else { + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); } - // Copy the remainder (high) result, if it is needed. - if (!N.getValue(1).use_empty()) { - SDValue Result; - if (HiReg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), - 0); - // Then truncate it down to i8. - SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32); - Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl, - MVT::i8, Result, SRIdx), 0); - } else { - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - } - ReplaceUses(N.getValue(1), Result); + ReplaceUses(N.getValue(1), Result); #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - DEBUG(Result.getNode()->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent-2, ' ') << "=> "; + Result.getNode()->dump(CurDAG); + errs() << '\n'; + }); #endif - } + } #ifndef NDEBUG - Indent -= 2; + Indent -= 2; #endif - return NULL; - } + return NULL; + } - case ISD::DECLARE: { - // Handle DECLARE nodes here because the second operand may have been - // wrapped in X86ISD::Wrapper. - SDValue Chain = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1); - - // FIXME: We need to handle this for VLAs. - if (!FINode) { - ReplaceUses(N.getValue(0), Chain); - return NULL; + case X86ISD::CMP: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to + // use a smaller encoding. + if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + N0.getValueType() != MVT::i8 && + X86::isZeroNode(N1)) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1)); + if (!C) break; + + // For example, convert "testl %eax, $8" to "testb %al, $8" + if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && + (!(C->getZExtValue() & 0x80) || + HasNoSignedComparisonUses(Node))) { + SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8); + SDValue Reg = N0.getNode()->getOperand(0); + + // On x86-32, only the ABCD registers have 8-bit subregisters. + if (!Subtarget->is64Bit()) { + TargetRegisterClass *TRC = 0; + switch (N0.getValueType().getSimpleVT().SimpleTy) { + case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; + case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; + default: llvm_unreachable("Unsupported TEST operand type!"); + } + SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); + Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, + Reg.getValueType(), Reg, RC), 0); + } + + // Extract the l-register. + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + MVT::i8, Reg); + + // Emit a testb. + return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); } - - if (N2.getOpcode() == ISD::ADD && - N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg) - N2 = N2.getOperand(1); - - // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled - // somehow, just ignore it. - if (N2.getOpcode() != X86ISD::Wrapper && - N2.getOpcode() != X86ISD::WrapperRIP) { - ReplaceUses(N.getValue(0), Chain); - return NULL; + + // For example, "testl %eax, $2048" to "testb %ah, $8". + if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 && + (!(C->getZExtValue() & 0x8000) || + HasNoSignedComparisonUses(Node))) { + // Shift the immediate right by 8 bits. + SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, + MVT::i8); + SDValue Reg = N0.getNode()->getOperand(0); + + // Put the value in an ABCD register. + TargetRegisterClass *TRC = 0; + switch (N0.getValueType().getSimpleVT().SimpleTy) { + case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; + case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; + case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; + default: llvm_unreachable("Unsupported TEST operand type!"); + } + SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); + Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, + Reg.getValueType(), Reg, RC), 0); + + // Extract the h-register. + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl, + MVT::i8, Reg); + + // Emit a testb. No special NOREX tricks are needed since there's + // only one GPR operand! + return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, + Subreg, ShiftedImm); } - GlobalAddressSDNode *GVNode = - dyn_cast<GlobalAddressSDNode>(N2.getOperand(0)); - if (GVNode == 0) { - ReplaceUses(N.getValue(0), Chain); - return NULL; + + // For example, "testl %eax, $32776" to "testw %ax, $32776". + if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 && + N0.getValueType() != MVT::i16 && + (!(C->getZExtValue() & 0x8000) || + HasNoSignedComparisonUses(Node))) { + SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16); + SDValue Reg = N0.getNode()->getOperand(0); + + // Extract the 16-bit subregister. + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl, + MVT::i16, Reg); + + // Emit a testw. + return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); + } + + // For example, "testq %rax, $268468232" to "testl %eax, $268468232". + if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 && + N0.getValueType() == MVT::i64 && + (!(C->getZExtValue() & 0x80000000) || + HasNoSignedComparisonUses(Node))) { + SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); + SDValue Reg = N0.getNode()->getOperand(0); + + // Extract the 32-bit subregister. + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl, + MVT::i32, Reg); + + // Emit a testl. + return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); } - SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(), - TLI.getPointerTy()); - SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(), - TLI.getPointerTy()); - SDValue Ops[] = { Tmp1, Tmp2, Chain }; - return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl, - MVT::Other, Ops, - array_lengthof(Ops)); } + break; + } } SDNode *ResNode = SelectCode(N); #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - if (ResNode == NULL || ResNode == N.getNode()) - DEBUG(N.getNode()->dump(CurDAG)); - else - DEBUG(ResNode->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent-2, ' ') << "=> "; + if (ResNode == NULL || ResNode == N.getNode()) + N.getNode()->dump(CurDAG); + else + ResNode->dump(CurDAG); + errs() << '\n'; + }); Indent -= 2; #endif diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5a6294a..fadc818 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16,13 +16,16 @@ #include "X86InstrBuilder.h" #include "X86ISelLowering.h" #include "X86TargetMachine.h" +#include "X86TargetObjectFile.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/Function.h" +#include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -33,21 +36,48 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt<bool> DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); +// Disable16Bit - 16-bit operations typically have a larger encoding than +// corresponding 32-bit instructions, and 16-bit code is slow on some +// processors. This is an experimental flag to disable 16-bit operations +// (which forces them to be Legalized to 32-bit operations). +static cl::opt<bool> +Disable16Bit("disable-16bit", cl::Hidden, + cl::desc("Disable use of 16-bit instructions")); + // Forward declarations. -static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); +static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { + switch (TM.getSubtarget<X86Subtarget>().TargetType) { + default: llvm_unreachable("unknown subtarget type"); + case X86Subtarget::isDarwin: + if (TM.getSubtarget<X86Subtarget>().is64Bit()) + return new X8664_MachoTargetObjectFile(); + return new X8632_MachoTargetObjectFile(); + case X86Subtarget::isELF: + return new TargetLoweringObjectFileELF(); + case X86Subtarget::isMingw: + case X86Subtarget::isCygwin: + case X86Subtarget::isWindows: + return new TargetLoweringObjectFileCOFF(); + } + +} + X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) - : TargetLowering(TM) { + : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget<X86Subtarget>(); X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); @@ -62,7 +92,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setShiftAmountType(MVT::i8); setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(SchedulingForRegPressure); - setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 setStackPointerRegisterToSaveRestore(X86StackPtr); if (Subtarget->isTargetDarwin()) { @@ -80,7 +109,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Set up the register classes. addRegisterClass(MVT::i8, X86::GR8RegisterClass); - addRegisterClass(MVT::i16, X86::GR16RegisterClass); + if (!Disable16Bit) + addRegisterClass(MVT::i16, X86::GR16RegisterClass); addRegisterClass(MVT::i32, X86::GR32RegisterClass); if (Subtarget->is64Bit()) addRegisterClass(MVT::i64, X86::GR64RegisterClass); @@ -89,9 +119,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // We don't accept any truncstore of integer registers. setTruncStoreAction(MVT::i64, MVT::i32, Expand); - setTruncStoreAction(MVT::i64, MVT::i16, Expand); + if (!Disable16Bit) + setTruncStoreAction(MVT::i64, MVT::i16, Expand); setTruncStoreAction(MVT::i64, MVT::i8 , Expand); - setTruncStoreAction(MVT::i32, MVT::i16, Expand); + if (!Disable16Bit) + setTruncStoreAction(MVT::i32, MVT::i16, Expand); setTruncStoreAction(MVT::i32, MVT::i8 , Expand); setTruncStoreAction(MVT::i16, MVT::i8, Expand); @@ -242,8 +274,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::CTTZ , MVT::i8 , Custom); setOperationAction(ISD::CTLZ , MVT::i8 , Custom); setOperationAction(ISD::CTPOP , MVT::i16 , Expand); - setOperationAction(ISD::CTTZ , MVT::i16 , Custom); - setOperationAction(ISD::CTLZ , MVT::i16 , Custom); + if (Disable16Bit) { + setOperationAction(ISD::CTTZ , MVT::i16 , Expand); + setOperationAction(ISD::CTLZ , MVT::i16 , Expand); + } else { + setOperationAction(ISD::CTTZ , MVT::i16 , Custom); + setOperationAction(ISD::CTLZ , MVT::i16 , Custom); + } setOperationAction(ISD::CTPOP , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Custom); setOperationAction(ISD::CTLZ , MVT::i32 , Custom); @@ -257,16 +294,22 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BSWAP , MVT::i16 , Expand); // These should be promoted to a larger select which is supported. - setOperationAction(ISD::SELECT , MVT::i1 , Promote); - setOperationAction(ISD::SELECT , MVT::i8 , Promote); + setOperationAction(ISD::SELECT , MVT::i1 , Promote); // X86 wants to expand cmov itself. - setOperationAction(ISD::SELECT , MVT::i16 , Custom); + setOperationAction(ISD::SELECT , MVT::i8 , Custom); + if (Disable16Bit) + setOperationAction(ISD::SELECT , MVT::i16 , Expand); + else + setOperationAction(ISD::SELECT , MVT::i16 , Custom); setOperationAction(ISD::SELECT , MVT::i32 , Custom); setOperationAction(ISD::SELECT , MVT::f32 , Custom); setOperationAction(ISD::SELECT , MVT::f64 , Custom); setOperationAction(ISD::SELECT , MVT::f80 , Custom); setOperationAction(ISD::SETCC , MVT::i8 , Custom); - setOperationAction(ISD::SETCC , MVT::i16 , Custom); + if (Disable16Bit) + setOperationAction(ISD::SETCC , MVT::i16 , Expand); + else + setOperationAction(ISD::SETCC , MVT::i16 , Custom); setOperationAction(ISD::SETCC , MVT::i32 , Custom); setOperationAction(ISD::SETCC , MVT::f32 , Custom); setOperationAction(ISD::SETCC , MVT::f64 , Custom); @@ -275,8 +318,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT , MVT::i64 , Custom); setOperationAction(ISD::SETCC , MVT::i64 , Custom); } - // X86 ret instruction may pop stack. - setOperationAction(ISD::RET , MVT::Other, Custom); setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // Darwin ABI issue. @@ -330,7 +371,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); } - // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion. + // Use the default ISD::DBG_STOPPOINT. setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); // FIXME - use subtarget debug flags if (!Subtarget->isTargetDarwin() && @@ -637,6 +678,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v4i16, Promote); setOperationAction(ISD::SELECT, MVT::v2i32, Promote); setOperationAction(ISD::SELECT, MVT::v1i64, Custom); + setOperationAction(ISD::VSETCC, MVT::v8i8, Custom); + setOperationAction(ISD::VSETCC, MVT::v4i16, Custom); + setOperationAction(ISD::VSETCC, MVT::v2i32, Custom); } if (!UseSoftFloat && Subtarget->hasSSE1()) { @@ -696,16 +740,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Custom lower build_vector, vector_shuffle, and extract_vector_elt. for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) { - MVT VT = (MVT::SimpleValueType)i; + EVT VT = (MVT::SimpleValueType)i; // Do not attempt to custom lower non-power-of-2 vectors if (!isPowerOf2_32(VT.getVectorNumElements())) continue; // Do not attempt to custom lower non-128-bit vectors if (!VT.is128BitVector()) continue; - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, + VT.getSimpleVT().SimpleTy, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, + VT.getSimpleVT().SimpleTy, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, + VT.getSimpleVT().SimpleTy, Custom); } setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); @@ -722,22 +769,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) { - MVT VT = (MVT::SimpleValueType)i; + MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; + EVT VT = SVT; // Do not attempt to promote non-128-bit vectors if (!VT.is128BitVector()) { continue; } - setOperationAction(ISD::AND, VT, Promote); - AddPromotedToType (ISD::AND, VT, MVT::v2i64); - setOperationAction(ISD::OR, VT, Promote); - AddPromotedToType (ISD::OR, VT, MVT::v2i64); - setOperationAction(ISD::XOR, VT, Promote); - AddPromotedToType (ISD::XOR, VT, MVT::v2i64); - setOperationAction(ISD::LOAD, VT, Promote); - AddPromotedToType (ISD::LOAD, VT, MVT::v2i64); - setOperationAction(ISD::SELECT, VT, Promote); - AddPromotedToType (ISD::SELECT, VT, MVT::v2i64); + setOperationAction(ISD::AND, SVT, Promote); + AddPromotedToType (ISD::AND, SVT, MVT::v2i64); + setOperationAction(ISD::OR, SVT, Promote); + AddPromotedToType (ISD::OR, SVT, MVT::v2i64); + setOperationAction(ISD::XOR, SVT, Promote); + AddPromotedToType (ISD::XOR, SVT, MVT::v2i64); + setOperationAction(ISD::LOAD, SVT, Promote); + AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64); + setOperationAction(ISD::SELECT, SVT, Promote); + AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64); } setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -847,7 +895,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Custom lower build_vector, vector_shuffle, and extract_vector_elt. // This includes 256-bit vectors for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) { - MVT VT = (MVT::SimpleValueType)i; + EVT VT = (MVT::SimpleValueType)i; // Do not attempt to custom lower non-power-of-2 vectors if (!isPowerOf2_32(VT.getVectorNumElements())) @@ -861,7 +909,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->is64Bit()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom); - } + } #endif #if 0 @@ -871,7 +919,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64. // Including 256-bit vectors for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) { - MVT VT = (MVT::SimpleValueType)i; + EVT VT = (MVT::SimpleValueType)i; if (!VT.is256BitVector()) { continue; @@ -933,13 +981,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores - allowUnalignedMemoryAccesses = true; // x86 supports it! setPrefLoopAlignment(16); benefitFromCodePlacementOpt = true; } -MVT X86TargetLowering::getSetCCResultType(MVT VT) const { +MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const { return MVT::i8; } @@ -993,7 +1040,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { /// and store operations as a result of memset, memcpy, and memmove /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for /// determining it. -MVT +EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, bool isSrcConst, bool isSrcStr, SelectionDAG &DAG) const { @@ -1019,7 +1066,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { if (usesGlobalOffsetTable()) return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); - if (!Subtarget->isPICStyleRIPRel()) + if (!Subtarget->is64Bit()) // This doesn't have DebugLoc associated with it, but is not really the // same as a Register. return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(), @@ -1029,7 +1076,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { - return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4; + return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; } //===----------------------------------------------------------------------===// @@ -1038,16 +1085,16 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { #include "X86GenCallingConv.inc" -/// LowerRET - Lower an ISD::RET node. -SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); - assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); +SDValue +X86TargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + DebugLoc dl, SelectionDAG &DAG) { SmallVector<CCValAssign, 16> RVLocs; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); - CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCInfo.AnalyzeReturn(Outs, RetCC_X86); // If this is the first return lowered for this function, add the regs to the // liveout set for the function. @@ -1056,49 +1103,19 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { if (RVLocs[i].isRegLoc()) DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - SDValue Chain = Op.getOperand(0); - - // Handle tail call return. - Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL); - if (Chain.getOpcode() == X86ISD::TAILCALL) { - SDValue TailCall = Chain; - SDValue TargetAddress = TailCall.getOperand(1); - SDValue StackAdjustment = TailCall.getOperand(2); - assert(((TargetAddress.getOpcode() == ISD::Register && - (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX || - cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R11)) || - TargetAddress.getOpcode() == ISD::TargetExternalSymbol || - TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && - "Expecting an global address, external symbol, or register"); - assert(StackAdjustment.getOpcode() == ISD::Constant && - "Expecting a const value"); - - SmallVector<SDValue,8> Operands; - Operands.push_back(Chain.getOperand(0)); - Operands.push_back(TargetAddress); - Operands.push_back(StackAdjustment); - // Copy registers used by the call. Last operand is a flag so it is not - // copied. - for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { - Operands.push_back(Chain.getOperand(i)); - } - return DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, &Operands[0], - Operands.size()); - } - // Regular return. SDValue Flag; SmallVector<SDValue, 6> RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Operand #1 = Bytes To Pop - RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); + RetOps.push_back(DAG.getTargetConstant(getBytesToPopOnReturn(), MVT::i16)); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue ValToCopy = Op.getOperand(i*2+1); + SDValue ValToCopy = Outs[i].Val; // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. @@ -1116,7 +1133,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 // which is returned in RAX / RDX. if (Subtarget->is64Bit()) { - MVT ValVT = ValToCopy.getValueType(); + EVT ValVT = ValToCopy.getValueType(); if (ValVT.isVector() && ValVT.getSizeInBits() == 64) { ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) @@ -1145,6 +1162,9 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag); Flag = Chain.getValue(1); + + // RAX now acts like a return value. + MF.getRegInfo().addLiveOut(X86::RAX); } RetOps[0] = Chain; // Update chain. @@ -1157,36 +1177,32 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { MVT::Other, &RetOps[0], RetOps.size()); } +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +/// +SDValue +X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) { -/// LowerCallResult - Lower the result values of an ISD::CALL into the -/// appropriate copies out of appropriate physical registers. This assumes that -/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call -/// being lowered. The returns a SDNode with the same number of values as the -/// ISD::CALL. -SDNode *X86TargetLowering:: -LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, - unsigned CallingConv, SelectionDAG &DAG) { - - DebugLoc dl = TheCall->getDebugLoc(); // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; - bool isVarArg = TheCall->isVarArg(); bool Is64Bit = Subtarget->is64Bit(); - CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); - CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); - - SmallVector<SDValue, 8> ResultVals; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, RetCC_X86); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; - MVT CopyVT = VA.getValVT(); + EVT CopyVT = VA.getValVT(); // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && - ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) { - cerr << "SSE register return with SSE disabled\n"; - exit(1); + ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { + llvm_report_error("SSE register return with SSE disabled"); } // If this is a call to a function that returns an fp value on the floating @@ -1206,7 +1222,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, MVT::v2i64, InFlag).getValue(1); Val = Chain.getValue(0); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, - Val, DAG.getConstant(0, MVT::i64)); + Val, DAG.getConstant(0, MVT::i64)); } else { Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i64, InFlag).getValue(1); @@ -1228,13 +1244,10 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, DAG.getIntPtrConstant(1)); } - ResultVals.push_back(Val); + InVals.push_back(Val); } - // Merge everything together with a MERGE_VALUES node. - ResultVals.push_back(Chain); - return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), - &ResultVals[0], ResultVals.size()).getNode(); + return Chain; } @@ -1248,30 +1261,28 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, // For info on fast calling convention see Fast Calling Convention (tail call) // implementation LowerX86_32FastCCCallTo. -/// CallIsStructReturn - Determines whether a CALL node uses struct return +/// CallIsStructReturn - Determines whether a call uses struct return /// semantics. -static bool CallIsStructReturn(CallSDNode *TheCall) { - unsigned NumOps = TheCall->getNumArgs(); - if (!NumOps) +static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) { + if (Outs.empty()) return false; - return TheCall->getArgFlags(0).isSRet(); + return Outs[0].Flags.isSRet(); } -/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct +/// ArgsAreStructReturn - Determines whether a function uses struct /// return semantics. -static bool ArgsAreStructReturn(SDValue Op) { - unsigned NumArgs = Op.getNode()->getNumValues() - 1; - if (!NumArgs) +static bool +ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { + if (Ins.empty()) return false; - return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet(); + return Ins[0].Flags.isSRet(); } -/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires -/// the callee to pop its own arguments. Callee pop is necessary to support tail -/// calls. -bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) { +/// IsCalleePop - Determines whether the callee is required to pop its +/// own arguments. Callee pop is necessary to support tail calls. +bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){ if (IsVarArg) return false; @@ -1289,7 +1300,7 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) { /// CCAssignFnForNode - Selects the correct CCAssignFn for a the /// given CallingConvention value. -CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { +CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; @@ -1305,36 +1316,18 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { return CC_X86_32_C; } -/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to -/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node. +/// NameDecorationForCallConv - Selects the appropriate decoration to +/// apply to a MachineFunction containing a given calling convention. NameDecorationStyle -X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) { - unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - if (CC == CallingConv::X86_FastCall) +X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) { + if (CallConv == CallingConv::X86_FastCall) return FastCall; - else if (CC == CallingConv::X86_StdCall) + else if (CallConv == CallingConv::X86_StdCall) return StdCall; return None; } -/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer -/// in a register before calling. -bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) { - return !IsTailCall && !Is64Bit && - getTargetMachine().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT(); -} - -/// CallRequiresFnAddressInReg - Check whether the call requires the function -/// address to be loaded in a register. -bool -X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) { - return !Is64Bit && IsTailCall && - getTargetMachine().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT(); -} - /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval @@ -1348,35 +1341,52 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, /*AlwaysInline=*/true, NULL, 0, NULL, 0); } -SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG, - const CCValAssign &VA, - MachineFrameInfo *MFI, - unsigned CC, - SDValue Root, unsigned i) { +SDValue +X86TargetLowering::LowerMemArgument(SDValue Chain, + CallingConv::ID CallConv, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + MachineFrameInfo *MFI, + unsigned i) { + // Create the nodes corresponding to a load from this parameter slot. - ISD::ArgFlagsTy Flags = - cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags(); - bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && PerformTailCallOpt; bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); + EVT ValVT; + + // If value is passed by pointer we have address passed instead of the value + // itself. + if (VA.getLocInfo() == CCValAssign::Indirect) + ValVT = VA.getLocVT(); + else + ValVT = VA.getValVT(); // FIXME: For now, all byval parameter objects are marked mutable. This can be // changed with more analysis. // In case of tail call optimization mark all arguments mutable. Since they // could be overwritten by lowering of arguments in case of a tail call. - int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) return FIN; - return DAG.getLoad(VA.getValVT(), Op.getDebugLoc(), Root, FIN, + return DAG.getLoad(ValVT, dl, Chain, FIN, PseudoSourceValue::getFixedStack(FI), 0); } SDValue -X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) { + MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); - DebugLoc dl = Op.getDebugLoc(); const Function* Fn = MF.getFunction(); if (Fn->hasExternalLinkage() && @@ -1385,25 +1395,23 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { FuncInfo->setForceFramePointer(true); // Decorate the function name. - FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op)); + FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv)); MachineFrameInfo *MFI = MF.getFrameInfo(); - SDValue Root = Op.getOperand(0); - bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0; - unsigned CC = MF.getFunction()->getCallingConv(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isTargetWin64(); - assert(!(isVarArg && CC == CallingConv::Fast) && + assert(!(isVarArg && CallConv == CallingConv::Fast) && "Var args not supported with calling convention fastcc"); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC)); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); - SmallVector<SDValue, 8> ArgValues; unsigned LastVal = ~0U; + SDValue ArgValue; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; // TODO: If an arg is passed in two places (e.g. reg and stack), skip later @@ -1413,7 +1421,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { LastVal = VA.getValNo(); if (VA.isRegLoc()) { - MVT RegVT = VA.getLocVT(); + EVT RegVT = VA.getLocVT(); TargetRegisterClass *RC = NULL; if (RegVT == MVT::i32) RC = X86::GR32RegisterClass; @@ -1425,27 +1433,13 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { RC = X86::FR64RegisterClass; else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) RC = X86::VR128RegisterClass; - else if (RegVT.isVector()) { - assert(RegVT.getSizeInBits() == 64); - if (!Is64Bit) - RC = X86::VR64RegisterClass; // MMX values are passed in MMXs. - else { - // Darwin calling convention passes MMX values in either GPRs or - // XMMs in x86-64. Other targets pass them in memory. - if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) { - RC = X86::VR128RegisterClass; // MMX values are passed in XMMs. - RegVT = MVT::v2i64; - } else { - RC = X86::GR64RegisterClass; // v1i64 values are passed in GPRs. - RegVT = MVT::i64; - } - } - } else { - assert(0 && "Unknown argument type!"); - } + else if (RegVT.isVector() && RegVT.getSizeInBits() == 64) + RC = X86::VR64RegisterClass; + else + llvm_unreachable("Unknown argument type!"); - unsigned Reg = DAG.getMachineFunction().addLiveIn(VA.getLocReg(), RC); - SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 // bits. Insert an assert[sz]ext to capture this, then truncate to the @@ -1456,52 +1450,53 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { else if (VA.getLocInfo() == CCValAssign::ZExt) ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::BCvt) + ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); - if (VA.getLocInfo() != CCValAssign::Full) - ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); - - // Handle MMX values passed in GPRs. - if (Is64Bit && RegVT != VA.getLocVT()) { - if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass) - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue); - else if (RC == X86::VR128RegisterClass) { + if (VA.isExtInLoc()) { + // Handle MMX values passed in XMM regs. + if (RegVT.isVector()) { ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, ArgValue, DAG.getConstant(0, MVT::i64)); - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue); - } + ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + } else + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); } - - ArgValues.push_back(ArgValue); } else { assert(VA.isMemLoc()); - ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i)); + ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i); } + + // If value is passed via pointer - do a load. + if (VA.getLocInfo() == CCValAssign::Indirect) + ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0); + + InVals.push_back(ArgValue); } // The x86-64 ABI for returning structs by value requires that we copy // the sret argument into %rax for the return. Save the argument into // a virtual register so that we can access it from the return points. - if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { - MachineFunction &MF = DAG.getMachineFunction(); + if (Is64Bit && MF.getFunction()->hasStructRetAttr()) { X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); FuncInfo->setSRetReturnReg(Reg); } - SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]); - Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root); + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); } unsigned StackSize = CCInfo.getNextStackOffset(); // align stack specially for tail calls - if (PerformTailCallOpt && CC == CallingConv::Fast) + if (PerformTailCallOpt && CallConv == CallingConv::Fast) StackSize = GetAlignedArgumentStackSize(StackSize, DAG); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - if (Is64Bit || CC != CallingConv::X86_FastCall) { + if (Is64Bit || CallConv != CallingConv::X86_FastCall) { VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); } if (Is64Bit) { @@ -1558,75 +1553,81 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { // Store the integer parameter registers. SmallVector<SDValue, 8> MemOps; SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); - SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, - DAG.getIntPtrConstant(VarArgsGPOffset)); + unsigned Offset = VarArgsGPOffset; for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { + SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, + DAG.getIntPtrConstant(Offset)); unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], X86::GR64RegisterClass); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); + PseudoSourceValue::getFixedStack(RegSaveFrameIndex), + Offset); MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, - DAG.getIntPtrConstant(8)); + Offset += 8; } - // Now store the XMM (fp + vector) parameter registers. - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, - DAG.getIntPtrConstant(VarArgsFPOffset)); - for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { - unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs], - X86::VR128RegisterClass); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::v4f32); - SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, - DAG.getIntPtrConstant(16)); + if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) { + // Now store the XMM (fp + vector) parameter registers. + SmallVector<SDValue, 11> SaveXMMOps; + SaveXMMOps.push_back(Chain); + + unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass); + SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); + SaveXMMOps.push_back(ALVal); + + SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex)); + SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset)); + + for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { + unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs], + X86::VR128RegisterClass); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); + SaveXMMOps.push_back(Val); + } + MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, + MVT::Other, + &SaveXMMOps[0], SaveXMMOps.size())); } + if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); } } - ArgValues.push_back(Root); - // Some CCs need callee pop. - if (IsCalleePop(isVarArg, CC)) { + if (IsCalleePop(isVarArg, CallConv)) { BytesToPopOnReturn = StackSize; // Callee pops everything. BytesCallerReserves = 0; } else { BytesToPopOnReturn = 0; // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. - if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op)) + if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins)) BytesToPopOnReturn = 4; BytesCallerReserves = StackSize; } if (!Is64Bit) { RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. - if (CC == CallingConv::X86_FastCall) + if (CallConv == CallingConv::X86_FastCall) VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. } FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); - // Return the new list of results. - return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), - &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); + return Chain; } SDValue -X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, - const SDValue &StackPtr, +X86TargetLowering::LowerMemOpCallTo(SDValue Chain, + SDValue StackPtr, SDValue Arg, + DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, - SDValue Chain, - SDValue Arg, ISD::ArgFlagsTy Flags) { - DebugLoc dl = TheCall->getDebugLoc(); - unsigned LocMemOffset = VA.getLocMemOffset(); + ISD::ArgFlagsTy Flags) { + const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0); + unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); if (Flags.isByVal()) { @@ -1649,7 +1650,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, if (!IsTailCall || FPDiff==0) return Chain; // Adjust the Return address stack slot. - MVT VT = getPointerTy(); + EVT VT = getPointerTy(); OutRetAddr = getReturnAddressFrameIndex(DAG); // Load the "old" Return address. @@ -1669,41 +1670,45 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); - MVT VT = Is64Bit ? MVT::i64 : MVT::i32; + EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0); return Chain; } -SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { +SDValue +X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) { + MachineFunction &MF = DAG.getMachineFunction(); - CallSDNode *TheCall = cast<CallSDNode>(Op.getNode()); - SDValue Chain = TheCall->getChain(); - unsigned CC = TheCall->getCallingConv(); - bool isVarArg = TheCall->isVarArg(); - bool IsTailCall = TheCall->isTailCall() && - CC == CallingConv::Fast && PerformTailCallOpt; - SDValue Callee = TheCall->getCallee(); bool Is64Bit = Subtarget->is64Bit(); - bool IsStructRet = CallIsStructReturn(TheCall); - DebugLoc dl = TheCall->getDebugLoc(); + bool IsStructRet = CallIsStructReturn(Outs); - assert(!(isVarArg && CC == CallingConv::Fast) && + assert((!isTailCall || + (CallConv == CallingConv::Fast && PerformTailCallOpt)) && + "IsEligibleForTailCallOptimization missed a case!"); + assert(!(isVarArg && CallConv == CallingConv::Fast) && "Var args not supported with calling convention fastcc"); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC)); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - if (PerformTailCallOpt && CC == CallingConv::Fast) + if (PerformTailCallOpt && CallConv == CallingConv::Fast) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; - if (IsTailCall) { + if (isTailCall) { // Lower arguments at fp - stackoffset + fpdiff. unsigned NumBytesCallerPushed = MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); @@ -1719,7 +1724,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { SDValue RetAddrFrIdx; // Load return adress for tail calls. - Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit, + Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit, FPDiff, dl); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; @@ -1730,57 +1735,54 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // of tail call optimization arguments are handle later. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = TheCall->getArg(i); - ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + EVT RegVT = VA.getLocVT(); + SDValue Arg = Outs[i].Val; + ISD::ArgFlagsTy Flags = Outs[i].Flags; bool isByVal = Flags.isByVal(); // Promote the value if needed. switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); + default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: - Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); break; case CCValAssign::ZExt: - Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); break; case CCValAssign::AExt: - Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + if (RegVT.isVector() && RegVT.getSizeInBits() == 128) { + // Special case: passing MMX values in XMM registers. + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); + Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); + Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); + } else + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg); + break; + case CCValAssign::Indirect: { + // Store the argument. + SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); + int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + Chain = DAG.getStore(Chain, dl, Arg, SpillSlot, + PseudoSourceValue::getFixedStack(FI), 0); + Arg = SpillSlot; + break; + } } if (VA.isRegLoc()) { - if (Is64Bit) { - MVT RegVT = VA.getLocVT(); - if (RegVT.isVector() && RegVT.getSizeInBits() == 64) - switch (VA.getLocReg()) { - default: - break; - case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX: - case X86::R8: { - // Special case: passing MMX values in GPR registers. - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); - break; - } - case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3: - case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: { - // Special case: passing MMX values in XMM registers. - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); - Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); - Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); - break; - } - } - } RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { - if (!IsTailCall || (IsTailCall && isByVal)) { + if (!isTailCall || (isTailCall && isByVal)) { assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy()); - MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, - Chain, Arg, Flags)); + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, + dl, DAG, VA, Flags)); } } } @@ -1794,37 +1796,41 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { SDValue InFlag; // Tail call byval lowering might overwrite argument registers so in case of // tail call optimization the copies to registers are lowered later. - if (!IsTailCall) + if (!isTailCall) for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } - // ELF / PIC requires GOT in the EBX register before function calls via PLT - // GOT pointer. - if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) { - Chain = DAG.getCopyToReg(Chain, dl, X86::EBX, - DAG.getNode(X86ISD::GlobalBaseReg, - DebugLoc::getUnknownLoc(), - getPointerTy()), - InFlag); - InFlag = Chain.getValue(1); - } - // If we are tail calling and generating PIC/GOT style code load the address - // of the callee into ecx. The value in ecx is used as target of the tail - // jump. This is done to circumvent the ebx/callee-saved problem for tail - // calls on PIC/GOT architectures. Normally we would just put the address of - // GOT into ebx and then call target@PLT. But for tail callss ebx would be - // restored (since ebx is callee saved) before jumping to the target@PLT. - if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) { - // Note: The actual moving to ecx is done further down. - GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); - if (G && !G->getGlobal()->hasHiddenVisibility() && - !G->getGlobal()->hasProtectedVisibility()) - Callee = LowerGlobalAddress(Callee, DAG); - else if (isa<ExternalSymbolSDNode>(Callee)) - Callee = LowerExternalSymbol(Callee,DAG); + + if (Subtarget->isPICStyleGOT()) { + // ELF / PIC requires GOT in the EBX register before function calls via PLT + // GOT pointer. + if (!isTailCall) { + Chain = DAG.getCopyToReg(Chain, dl, X86::EBX, + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), + getPointerTy()), + InFlag); + InFlag = Chain.getValue(1); + } else { + // If we are tail calling and generating PIC/GOT style code load the + // address of the callee into ECX. The value in ecx is used as target of + // the tail jump. This is done to circumvent the ebx/callee-saved problem + // for tail calls on PIC/GOT architectures. Normally we would just put the + // address of GOT into ebx and then call target@PLT. But for tail calls + // ebx would be restored (since ebx is callee saved) before jumping to the + // target@PLT. + + // Note: The actual moving to ECX is done further down. + GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); + if (G && !G->getGlobal()->hasHiddenVisibility() && + !G->getGlobal()->hasProtectedVisibility()) + Callee = LowerGlobalAddress(Callee, DAG); + else if (isa<ExternalSymbolSDNode>(Callee)) + Callee = LowerExternalSymbol(Callee, DAG); + } } if (Is64Bit && isVarArg) { @@ -1853,7 +1859,15 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // For tail calls lower the arguments to the 'real' stack slot. - if (IsTailCall) { + if (isTailCall) { + // Force all the incoming stack arguments to be loaded from the stack + // before any new outgoing arguments are stored to the stack, because the + // outgoing stack slots may alias the incoming argument stack slots, and + // the alias isn't otherwise explicit. This is slightly more conservative + // than necessary, because it means that each store effectively depends + // on every argument instead of just those arguments it would clobber. + SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); + SmallVector<SDValue, 8> MemOpChains2; SDValue FIN; int FI = 0; @@ -1863,8 +1877,8 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { CCValAssign &VA = ArgLocs[i]; if (!VA.isRegLoc()) { assert(VA.isMemLoc()); - SDValue Arg = TheCall->getArg(i); - ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + SDValue Arg = Outs[i].Val; + ISD::ArgFlagsTy Flags = Outs[i].Flags; // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; @@ -1879,12 +1893,13 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { getPointerTy()); Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); - MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain, + MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, + ArgChain, Flags, DAG, dl)); } else { // Store relative to framepointer. MemOpChains2.push_back( - DAG.getStore(Chain, dl, Arg, FIN, + DAG.getStore(ArgChain, dl, Arg, FIN, PseudoSourceValue::getFixedStack(FI), 0)); } } @@ -1912,13 +1927,49 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { // We should use extra load for direct calls to dllimported functions in // non-JIT mode. - if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), - getTargetMachine(), true)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(), - G->getOffset()); + GlobalValue *GV = G->getGlobal(); + if (!GV->hasDLLImportLinkage()) { + unsigned char OpFlags = 0; + + // On ELF targets, in both X86-64 and X86-32 mode, direct calls to + // external symbols most go through the PLT in PIC mode. If the symbol + // has hidden or protected visibility, or if it is static or local, then + // we don't need to use the PLT - we can directly call it. + if (Subtarget->isTargetELF() && + getTargetMachine().getRelocationModel() == Reloc::PIC_ && + GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { + OpFlags = X86II::MO_PLT; + } else if (Subtarget->isPICStyleStubAny() && + (GV->isDeclaration() || GV->isWeakForLinker()) && + Subtarget->getDarwinVers() < 9) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = X86II::MO_DARWIN_STUB; + } + + Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(), + G->getOffset(), OpFlags); + } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); - } else if (IsTailCall) { + unsigned char OpFlags = 0; + + // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external + // symbols should go through the PLT. + if (Subtarget->isTargetELF() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) { + OpFlags = X86II::MO_PLT; + } else if (Subtarget->isPICStyleStubAny() && + Subtarget->getDarwinVers() < 9) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = X86II::MO_DARWIN_STUB; + } + + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), + OpFlags); + } else if (isTailCall) { unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, @@ -1926,27 +1977,23 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { Callee,InFlag); Callee = DAG.getRegister(Opc, getPointerTy()); // Add register as live out. - DAG.getMachineFunction().getRegInfo().addLiveOut(Opc); + MF.getRegInfo().addLiveOut(Opc); } // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); SmallVector<SDValue, 8> Ops; - if (IsTailCall) { + if (isTailCall) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); - - // Returns a chain & a flag for retval copy to use. - NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); - Ops.clear(); } Ops.push_back(Chain); Ops.push_back(Callee); - if (IsTailCall) + if (isTailCall) Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); // Add argument registers to the end of the list so that they are known live @@ -1956,9 +2003,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { RegsToPass[i].second.getValueType())); // Add an implicit use GOT pointer in EBX. - if (!IsTailCall && !Is64Bit && - getTargetMachine().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT()) + if (!isTailCall && Subtarget->isPICStyleGOT()) Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); // Add an implicit use of AL for x86 vararg functions. @@ -1968,13 +2013,28 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { if (InFlag.getNode()) Ops.push_back(InFlag); - if (IsTailCall) { - assert(InFlag.getNode() && - "Flag must be set. Depend on flag being set in LowerRET"); - Chain = DAG.getNode(X86ISD::TAILCALL, dl, - TheCall->getVTList(), &Ops[0], Ops.size()); + if (isTailCall) { + // If this is the first return lowered for this function, add the regs + // to the liveout set for the function. + if (MF.getRegInfo().liveout_empty()) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, + *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, RetCC_X86); + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + assert(((Callee.getOpcode() == ISD::Register && + (cast<RegisterSDNode>(Callee)->getReg() == X86::EAX || + cast<RegisterSDNode>(Callee)->getReg() == X86::R9)) || + Callee.getOpcode() == ISD::TargetExternalSymbol || + Callee.getOpcode() == ISD::TargetGlobalAddress) && + "Expecting an global address, external symbol, or register"); - return SDValue(Chain.getNode(), Op.getResNo()); + return DAG.getNode(X86ISD::TC_RETURN, dl, + NodeTys, &Ops[0], Ops.size()); } Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); @@ -1982,9 +2042,9 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPush; - if (IsCalleePop(isVarArg, CC)) + if (IsCalleePop(isVarArg, CallConv)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything - else if (!Is64Bit && CC != CallingConv::Fast && IsStructRet) + else if (!Is64Bit && CallConv != CallingConv::Fast && IsStructRet) // If this is is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. @@ -2002,8 +2062,8 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Handle result values, copying them out of physregs into vregs that we // return. - return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), - Op.getResNo()); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, + Ins, dl, DAG, InVals); } @@ -2060,36 +2120,18 @@ unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, return Offset; } -/// IsEligibleForTailCallElimination - Check to see whether the next instruction -/// following the call is a return. A function is eligible if caller/callee -/// calling conventions match, currently only fastcc supports tail calls, and -/// the function CALL is immediatly followed by a RET. -bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall, - SDValue Ret, - SelectionDAG& DAG) const { - if (!PerformTailCallOpt) - return false; - - if (CheckTailCallReturnConstraints(TheCall, Ret)) { - MachineFunction &MF = DAG.getMachineFunction(); - unsigned CallerCC = MF.getFunction()->getCallingConv(); - unsigned CalleeCC= TheCall->getCallingConv(); - if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { - SDValue Callee = TheCall->getCallee(); - // On x86/32Bit PIC/GOT tail calls are supported. - if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || - !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit()) - return true; - - // Can only do local tail calls (in same module, hidden or protected) on - // x86_64 PIC/GOT at the moment. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - return G->getGlobal()->hasHiddenVisibility() - || G->getGlobal()->hasProtectedVisibility(); - } - } - - return false; +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. Targets which want to do tail call +/// optimization should implement this function. +bool +X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); + return CalleeCC == CallingConv::Fast && CallerCC == CalleeCC; } FastISel * @@ -2133,6 +2175,36 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { } +bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, + bool hasSymbolicDisplacement) { + // Offset should fit into 32 bit immediate field. + if (!isInt32(Offset)) + return false; + + // If we don't have a symbolic displacement - we don't have any extra + // restrictions. + if (!hasSymbolicDisplacement) + return true; + + // FIXME: Some tweaks might be needed for medium code model. + if (M != CodeModel::Small && M != CodeModel::Kernel) + return false; + + // For small code model we assume that latest object is 16MB before end of 31 + // bits boundary. We may also accept pretty large negative constants knowing + // that all objects are in the positive half of address space. + if (M == CodeModel::Small && Offset < 16*1024*1024) + return true; + + // For kernel code model we know that all object resist in the negative half + // of 32bits address space. We may not accept negative offsets, since they may + // be just off and we may accept pretty large positive ones. + if (M == CodeModel::Kernel && Offset > 0) + return true; + + return false; +} + /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 /// specific condition code, returning the condition code and the LHS/RHS of the /// comparison to make. @@ -2155,7 +2227,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, } switch (SetCCOpcode) { - default: assert(0 && "Invalid integer condition!"); + default: llvm_unreachable("Invalid integer condition!"); case ISD::SETEQ: return X86::COND_E; case ISD::SETGT: return X86::COND_G; case ISD::SETGE: return X86::COND_GE; @@ -2195,7 +2267,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, // 1 | 0 | 0 | X == Y // 1 | 1 | 1 | unordered switch (SetCCOpcode) { - default: assert(0 && "Condcode should be pre-legalized away"); + default: llvm_unreachable("Condcode should be pre-legalized away"); case ISD::SETUEQ: case ISD::SETEQ: return X86::COND_E; case ISD::SETOLT: // flipped @@ -2253,7 +2325,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) { /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference /// the second operand. -static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, MVT VT) { +static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) { if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); if (VT == MVT::v2f64 || VT == MVT::v2i64) @@ -2262,68 +2334,68 @@ static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, MVT VT) { } bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; + SmallVector<int, 8> M; N->getMask(M); return ::isPSHUFDMask(M, N->getValueType(0)); } /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFHW. -static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, MVT VT) { +static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) { if (VT != MVT::v8i16) return false; - + // Lower quadword copied in order or undef. for (int i = 0; i != 4; ++i) if (Mask[i] >= 0 && Mask[i] != i) return false; - + // Upper quadword shuffled. for (int i = 4; i != 8; ++i) if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) return false; - + return true; } bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; + SmallVector<int, 8> M; N->getMask(M); return ::isPSHUFHWMask(M, N->getValueType(0)); } /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFLW. -static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, MVT VT) { +static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, EVT VT) { if (VT != MVT::v8i16) return false; - + // Upper quadword copied in order. for (int i = 4; i != 8; ++i) if (Mask[i] >= 0 && Mask[i] != i) return false; - + // Lower quadword shuffled. for (int i = 0; i != 4; ++i) if (Mask[i] >= 4) return false; - + return true; } bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; + SmallVector<int, 8> M; N->getMask(M); return ::isPSHUFLWMask(M, N->getValueType(0)); } /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. -static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) { +static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4) return false; - + int Half = NumElems / 2; for (int i = 0; i < Half; ++i) if (!isUndefOrInRange(Mask[i], 0, NumElems)) @@ -2331,7 +2403,7 @@ static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) { for (int i = Half; i < NumElems; ++i) if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) return false; - + return true; } @@ -2345,12 +2417,12 @@ bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { /// the reverse of what x86 shuffles want. x86 shuffles requires the lower /// half elements to come from vector 1 (which would equal the dest.) and /// the upper half to come from vector 2. -static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) { +static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { int NumElems = VT.getVectorNumElements(); - - if (NumElems != 2 && NumElems != 4) + + if (NumElems != 2 && NumElems != 4) return false; - + int Half = NumElems / 2; for (int i = 0; i < Half; ++i) if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) @@ -2424,24 +2496,24 @@ bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { /// <2, 3, 2, 3> bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { unsigned NumElems = N->getValueType(0).getVectorNumElements(); - + if (NumElems != 4) return false; - - return isUndefOrEqual(N->getMaskElt(0), 2) && + + return isUndefOrEqual(N->getMaskElt(0), 2) && isUndefOrEqual(N->getMaskElt(1), 3) && - isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(2), 2) && isUndefOrEqual(N->getMaskElt(3), 3); } /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. -static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, MVT VT, +static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -2466,12 +2538,12 @@ bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. -static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, MVT VT, +static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -2497,11 +2569,11 @@ bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, MVT VT) { +static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) { int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - + for (int i = 0, j = 0; i != NumElems; i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -2522,11 +2594,11 @@ bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) { /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> -static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, MVT VT) { +static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) { int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - + for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -2547,19 +2619,19 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. -static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) { +static bool isMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT) { if (VT.getVectorElementType().getSizeInBits() < 32) return false; int NumElts = VT.getVectorNumElements(); - + if (!isUndefOrEqual(Mask[0], NumElts)) return false; - + for (int i = 1; i < NumElts; ++i) if (!isUndefOrEqual(Mask[i], i)) return false; - + return true; } @@ -2572,21 +2644,21 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) { /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. -static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT, +static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT, bool V2IsSplat = false, bool V2IsUndef = false) { int NumOps = VT.getVectorNumElements(); if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; - + if (!isUndefOrEqual(Mask[0], 0)) return false; - + for (int i = 1; i < NumOps; ++i) if (!(isUndefOrEqual(Mask[i], i+NumOps) || (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) || (V2IsSplat && isUndefOrEqual(Mask[i], NumOps)))) return false; - + return true; } @@ -2650,7 +2722,7 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) { /// specifies a shuffle of elements that is suitable for input to MOVDDUP. bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { int e = N->getValueType(0).getVectorNumElements() / 2; - + for (int i = 0; i < e; ++i) if (!isUndefOrEqual(N->getMaskElt(i), i)) return false; @@ -2714,14 +2786,23 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { return Mask; } +/// isZeroNode - Returns true if Elt is a constant zero or a floating point +/// constant +0.0. +bool X86::isZeroNode(SDValue Elt) { + return ((isa<ConstantSDNode>(Elt) && + cast<ConstantSDNode>(Elt)->getZExtValue() == 0) || + (isa<ConstantFPSDNode>(Elt) && + cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); +} + /// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in /// their permute mask. static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - MVT VT = SVOp->getValueType(0); + EVT VT = SVOp->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); SmallVector<int, 8> MaskVec; - + for (unsigned i = 0; i != NumElems; ++i) { int idx = SVOp->getMaskElt(i); if (idx < 0) @@ -2737,7 +2818,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming /// the two vector operands have swapped position. -static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, MVT VT) { +static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { unsigned NumElems = VT.getVectorNumElements(); for (unsigned i = 0; i != NumElems; ++i) { int idx = Mask[i]; @@ -2795,7 +2876,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, return false; unsigned NumElems = Op->getValueType(0).getVectorNumElements(); - + if (NumElems != 2 && NumElems != 4) return false; for (unsigned i = 0, e = NumElems/2; i != e; ++i) @@ -2820,17 +2901,8 @@ static bool isSplatVector(SDNode *N) { return true; } -/// isZeroNode - Returns true if Elt is a constant zero or a floating point -/// constant +0.0. -static inline bool isZeroNode(SDValue Elt) { - return ((isa<ConstantSDNode>(Elt) && - cast<ConstantSDNode>(Elt)->getZExtValue() == 0) || - (isa<ConstantFPSDNode>(Elt) && - cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); -} - /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved -/// to an zero vector. +/// to an zero vector. /// FIXME: move to dag combiner / method on ShuffleVectorSDNode static bool isZeroShuffle(ShuffleVectorSDNode *N) { SDValue V1 = N->getOperand(0); @@ -2842,13 +2914,15 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { unsigned Opc = V2.getOpcode(); if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems))) + if (Opc != ISD::BUILD_VECTOR || + !X86::isZeroNode(V2.getOperand(Idx-NumElems))) return false; } else if (Idx >= 0) { unsigned Opc = V1.getOpcode(); if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx))) + if (Opc != ISD::BUILD_VECTOR || + !X86::isZeroNode(V1.getOperand(Idx))) return false; } } @@ -2857,7 +2931,7 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { /// getZeroVector - Returns a vector of specified type with all zero elements. /// -static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG, +static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); @@ -2879,7 +2953,7 @@ static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG, /// getOnesVector - Returns a vector of specified type with all bits set. /// -static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { +static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest @@ -2897,13 +2971,13 @@ static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - MVT VT = SVOp->getValueType(0); + EVT VT = SVOp->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); - + bool Changed = false; SmallVector<int, 8> MaskVec; SVOp->getMask(MaskVec); - + for (unsigned i = 0; i != NumElems; ++i) { if (MaskVec[i] > (int)NumElems) { MaskVec[i] = NumElems; @@ -2918,7 +2992,7 @@ static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd /// operation of specified width. -static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); SmallVector<int, 8> Mask; @@ -2929,7 +3003,7 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, } /// getUnpackl - Returns a vector_shuffle node for an unpackl operation. -static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, +static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); SmallVector<int, 8> Mask; @@ -2941,7 +3015,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, } /// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation. -static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, +static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); unsigned Half = NumElems/2; @@ -2954,13 +3028,13 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, } /// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. -static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, +static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, bool HasSSE2) { if (SV->getValueType(0).getVectorNumElements() <= 4) return SDValue(SV, 0); - - MVT PVT = MVT::v4f32; - MVT VT = SV->getValueType(0); + + EVT PVT = MVT::v4f32; + EVT VT = SV->getValueType(0); DebugLoc dl = SV->getDebugLoc(); SDValue V1 = SV->getOperand(0); int NumElems = VT.getVectorNumElements(); @@ -2976,7 +3050,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, } NumElems >>= 1; } - + // Perform the splat. int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); @@ -2991,7 +3065,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool isZero, bool HasSSE2, SelectionDAG &DAG) { - MVT VT = V2.getValueType(); + EVT VT = V2.getValueType(); SDValue V1 = isZero ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); unsigned NumElems = VT.getVectorNumElements(); @@ -3016,7 +3090,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems, continue; } SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); - if (Elt.getNode() && isZeroNode(Elt)) + if (Elt.getNode() && X86::isZeroNode(Elt)) ++NumZeros; else break; @@ -3142,11 +3216,11 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, /// getVShift - Return a vector logical shift node. /// -static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, +static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc dl) { bool isMMX = VT.getSizeInBits() == 64; - MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; + EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, @@ -3171,9 +3245,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl); } - MVT VT = Op.getValueType(); - MVT EVT = VT.getVectorElementType(); - unsigned EVTBits = EVT.getSizeInBits(); + EVT VT = Op.getValueType(); + EVT ExtVT = VT.getVectorElementType(); + unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumElems = Op.getNumOperands(); unsigned NumZero = 0; @@ -3189,7 +3263,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { if (Elt.getOpcode() != ISD::Constant && Elt.getOpcode() != ISD::ConstantFP) IsAllConstants = false; - if (isZeroNode(Elt)) + if (X86::isZeroNode(Elt)) NumZero++; else { NonZeros |= (1 << i); @@ -3212,11 +3286,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // insertion that way. Only do this if the value is non-constant or if the // value is a constant being inserted into element 0. It is cheaper to do // a constant pool load than it is to do a movd + shuffle. - if (EVT == MVT::i64 && !Subtarget->is64Bit() && + if (ExtVT == MVT::i64 && !Subtarget->is64Bit() && (!IsAllConstants || Idx == 0)) { if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { // Handle MMX and SSE both. - MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; + EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; unsigned VecElts = VT == MVT::v2i64 ? 4 : 2; // Truncate the value (which may itself be a constant) to i32, and @@ -3234,7 +3308,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { for (unsigned i = 1; i != VecElts; ++i) Mask.push_back(i); Item = DAG.getVectorShuffle(VecVT, dl, Item, - DAG.getUNDEF(Item.getValueType()), + DAG.getUNDEF(Item.getValueType()), &Mask[0]); } return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); @@ -3248,15 +3322,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { if (Idx == 0) { if (NumZero == 0) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); - } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 || - (EVT == MVT::i64 && Subtarget->is64Bit())) { + } else if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || + (ExtVT == MVT::i64 && Subtarget->is64Bit())) { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), DAG); - } else if (EVT == MVT::i16 || EVT == MVT::i8) { + } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; + EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), DAG); @@ -3266,7 +3340,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Is it a vector logical left shift? if (NumElems == 2 && Idx == 1 && - isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) { + X86::isZeroNode(Op.getOperand(0)) && + !X86::isZeroNode(Op.getOperand(1))) { unsigned NumBits = VT.getSizeInBits(); return getVShift(true, VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, @@ -3374,9 +3449,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // If we have SSE 4.1, Expand into a number of inserts unless the number of // values to be inserted is equal to the number of elements, in which case // use the unpack code below in the hopes of matching the consecutive elts - // load merge pattern for shuffles. + // load merge pattern for shuffles. // FIXME: We could probably just check that here directly. - if (Values.size() < NumElems && VT.getSizeInBits() == 128 && + if (Values.size() < NumElems && VT.getSizeInBits() == 128 && getSubtarget()->hasSSE41()) { V[0] = DAG.getUNDEF(VT); for (unsigned i = 0; i < NumElems; ++i) @@ -3457,7 +3532,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, } // For SSSE3, If all 8 words of the result come from only 1 quadword of each - // of the two input vectors, shuffle them into one input vector so only a + // of the two input vectors, shuffle them into one input vector so only a // single pshufb instruction is necessary. If There are more than 2 input // quads, disable the next transformation since it does not help SSSE3. bool V1Used = InputQuads[0] || InputQuads[1]; @@ -3481,7 +3556,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, SmallVector<int, 8> MaskV; MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); - NewV = DAG.getVectorShuffle(MVT::v2i64, dl, + NewV = DAG.getVectorShuffle(MVT::v2i64, dl, DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); @@ -3506,7 +3581,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, int idx = MaskVals[i]; if (idx < 0) continue; - idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4; + idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4; if ((idx != i) && idx < 4) pshufhw = false; if ((idx != i) && idx > 3) @@ -3521,19 +3596,19 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // If we've eliminated the use of V2, and the new mask is a pshuflw or // pshufhw, that's as cheap as it gets. Return the new shuffle. if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) { - return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, + return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); } } - + // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. if (TLI.getSubtarget()->hasSSSE3()) { SmallVector<SDValue,16> pshufbMask; - + // If we have elements from both input vectors, set the high bit of the - // shuffle mask element to zero out elements that come from V2 in the V1 + // shuffle mask element to zero out elements that come from V2 in the V1 // mask, and elements that come from V1 in the V2 mask, so that the two // results can be OR'd together. bool TwoInputs = V1Used && V2Used; @@ -3548,12 +3623,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8)); } V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1); - V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, + V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); if (!TwoInputs) return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); - + // Calculate the shuffle mask for the second input, shuffle it, and // OR it with the first shuffled input. pshufbMask.clear(); @@ -3568,7 +3643,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8)); } V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2); - V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, + V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); @@ -3597,7 +3672,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); } - + // If BestHi >= 0, generate a pshufhw to put the high elements in order, // and update MaskVals with the new element order. if (BestHiQuad >= 0) { @@ -3619,7 +3694,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); } - + // In case BestHi & BestLo were both -1, which means each quadword has a word // from each of the four input quadwords, calculate the InOrder bitvector now // before falling through to the insert/extract cleanup. @@ -3629,7 +3704,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, if (MaskVals[i] < 0 || MaskVals[i] == i) InOrder.set(i); } - + // The other elements are put in the right place using pextrw and pinsrw. for (unsigned i = 0; i != 8; ++i) { if (InOrder[i]) @@ -3660,9 +3735,9 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DebugLoc dl = SVOp->getDebugLoc(); SmallVector<int, 16> MaskVals; SVOp->getMask(MaskVals); - + // If we have SSSE3, case 1 is generated when all result bytes come from - // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is + // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is // present, fall back to case 3. // FIXME: kill V2Only once shuffles are canonizalized by getNode. bool V1Only = true; @@ -3676,13 +3751,13 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, else V1Only = false; } - + // If SSSE3, use 1 pshufb instruction per vector with elements in the result. if (TLI.getSubtarget()->hasSSSE3()) { SmallVector<SDValue,16> pshufbMask; - + // If all result elements are from one input vector, then only translate - // undef mask values to 0x80 (zero out result) in the pshufb mask. + // undef mask values to 0x80 (zero out result) in the pshufb mask. // // Otherwise, we have elements from both input vectors, and must zero out // elements that come from V2 in the first mask, and V1 in the second mask @@ -3705,7 +3780,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, MVT::v16i8, &pshufbMask[0], 16)); if (!TwoInputs) return V1; - + // Calculate the shuffle mask for the second input, shuffle it, and // OR it with the first shuffled input. pshufbMask.clear(); @@ -3722,7 +3797,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, MVT::v16i8, &pshufbMask[0], 16)); return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); } - + // No SSSE3 - Calculate in place words and then fix all out of place words // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from // the 16 different words that comprise the two doublequadword input vectors. @@ -3732,17 +3807,17 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, for (int i = 0; i != 8; ++i) { int Elt0 = MaskVals[i*2]; int Elt1 = MaskVals[i*2+1]; - + // This word of the result is all undef, skip it. if (Elt0 < 0 && Elt1 < 0) continue; - + // This word of the result is already in the correct place, skip it. if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1)) continue; if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17)) continue; - + SDValue Elt0Src = Elt0 < 16 ? V1 : V2; SDValue Elt1Src = Elt1 < 16 ? V1 : V2; SDValue InsElt; @@ -3801,15 +3876,15 @@ static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, TargetLowering &TLI, DebugLoc dl) { - MVT VT = SVOp->getValueType(0); + EVT VT = SVOp->getValueType(0); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; - MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); - MVT MaskEltVT = MaskVT.getVectorElementType(); - MVT NewVT = MaskVT; - switch (VT.getSimpleVT()) { + EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); + EVT MaskEltVT = MaskVT.getVectorElementType(); + EVT NewVT = MaskVT; + switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unexpected!"); case MVT::v4f32: NewVT = MVT::v2f64; break; case MVT::v4i32: NewVT = MVT::v2i64; break; @@ -3849,7 +3924,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, /// getVZextMovL - Return a zero-extending vector move low node. /// -static SDValue getVZextMovL(MVT VT, MVT OpVT, +static SDValue getVZextMovL(EVT VT, EVT OpVT, SDValue SrcOp, SelectionDAG &DAG, const X86Subtarget *Subtarget, DebugLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { @@ -3859,11 +3934,11 @@ static SDValue getVZextMovL(MVT VT, MVT OpVT, if (!LD) { // movssrr and movsdrr do not clear top bits. Try to use movd, movq // instead. - MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; - if ((EVT != MVT::i64 || Subtarget->is64Bit()) && + MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; + if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) && SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR && SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT && - SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) { + SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) { // PR2108 OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32; return DAG.getNode(ISD::BIT_CONVERT, dl, VT, @@ -3889,8 +3964,8 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - MVT VT = SVOp->getValueType(0); - + EVT VT = SVOp->getValueType(0); + SmallVector<std::pair<int, int>, 8> Locs; Locs.resize(4); SmallVector<int, 8> Mask1(4U, -1); @@ -3926,7 +4001,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); SmallVector<int, 8> Mask2(4U, -1); - + for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) continue; @@ -4036,7 +4111,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); bool isMMX = VT.getSizeInBits() == 64; @@ -4050,7 +4125,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // Promote splats to v4f32. if (SVOp->isSplat()) { - if (isMMX || NumElems < 4) + if (isMMX || NumElems < 4) return Op; return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2()); } @@ -4079,10 +4154,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DAG, Subtarget, dl); } } - + if (X86::isPSHUFDMask(SVOp)) return Op; - + // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; @@ -4092,11 +4167,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. - MVT EVT = VT.getVectorElementType(); - ShAmt *= EVT.getSizeInBits(); + EVT EltVT = VT.getVectorElementType(); + ShAmt *= EltVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - + if (X86::isMOVLMask(SVOp)) { if (V1IsUndef) return V2; @@ -4105,7 +4180,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (!isMMX) return Op; } - + // FIXME: fold these into legal mask. if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || X86::isMOVSLDUPMask(SVOp) || @@ -4120,11 +4195,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (isShift) { // No better options. Use a vshl / vsrl. - MVT EVT = VT.getVectorElementType(); - ShAmt *= EVT.getSizeInBits(); + EVT EltVT = VT.getVectorElementType(); + ShAmt *= EltVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - + bool Commuted = false; // FIXME: This should also accept a bitcast of a splat? Be careful, not // 1,1,1,1 -> v8i16 though. @@ -4144,7 +4219,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) { // Shuffling low element of v1 into undef, just return v1. - if (V2IsUndef) + if (V2IsUndef) return V1; // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which // the instruction selector will not match, so get a canonical MOVL with @@ -4196,7 +4271,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SVOp->getMask(PermMask); if (isShuffleMaskLegal(PermMask, VT)) return Op; - + // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this); @@ -4209,7 +4284,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (NewOp.getNode()) return NewOp; } - + // Handle all 4 wide cases with a number of shuffles except for MMX. if (NumElems == 4 && !isMMX) return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); @@ -4220,7 +4295,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); if (VT.getSizeInBits() == 8) { SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, @@ -4283,7 +4358,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { return Res; } - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // TODO: handle v16i8. if (VT.getSizeInBits() == 16) { @@ -4296,21 +4371,21 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { MVT::v4i32, Vec), Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. - MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1); - SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EVT, + EVT EltVT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy+1); + SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT, Op.getOperand(0), Op.getOperand(1)); - SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EVT, Extract, + SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract, DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); } else if (VT.getSizeInBits() == 32) { unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); if (Idx == 0) return Op; - + // SHUFPS the element to the lowest double word, then movss. int Mask[4] = { Idx, -1, -1, -1 }; - MVT VVT = Op.getOperand(0).getValueType(); - SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + EVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); @@ -4326,8 +4401,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. int Mask[2] = { 1, -1 }; - MVT VVT = Op.getOperand(0).getValueType(); - SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + EVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); @@ -4338,18 +4413,18 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ - MVT VT = Op.getValueType(); - MVT EVT = VT.getVectorElementType(); + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) && + if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && isa<ConstantSDNode>(N2)) { - unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB - : X86ISD::PINSRW; + unsigned Opc = (EltVT.getSizeInBits() == 8) ? X86ISD::PINSRB + : X86ISD::PINSRW; // Transform it so it match pinsr{b,w} which expects a GR32 as its second // argument. if (N1.getValueType() != MVT::i32) @@ -4357,7 +4432,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); return DAG.getNode(Opc, dl, VT, N0, N1, N2); - } else if (EVT == MVT::f32 && isa<ConstantSDNode>(N2)) { + } else if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) { // Bits [7:6] of the constant are the source select. This will always be // zero here. The DAG Combiner may combine an extract_elt index into these // bits. For example (insert (extract, 3), 2) could be matched by putting @@ -4367,24 +4442,25 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may // combine either bitwise AND or insert of float 0.0 to set these bits. N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4); + // Create this as a scalar to vector.. + N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); - } else if (EVT == MVT::i32) { - // InsertPS works with constant index. - if (isa<ConstantSDNode>(N2)) - return Op; + } else if (EltVT == MVT::i32 && isa<ConstantSDNode>(N2)) { + // PINSR* works with constant index. + return Op; } return SDValue(); } SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT EVT = VT.getVectorElementType(); + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); if (Subtarget->hasSSE41()) return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); - if (EVT == MVT::i8) + if (EltVT == MVT::i8) return SDValue(); DebugLoc dl = Op.getDebugLoc(); @@ -4392,7 +4468,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - if (EVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) { + if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) { // Transform it so it match pinsrw which expects a 16-bit value in a GR32 // as its second argument. if (N1.getValueType() != MVT::i32) @@ -4413,9 +4489,12 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op.getOperand(0)))); + if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); + SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); - MVT VT = MVT::v2i32; - switch (Op.getValueType().getSimpleVT()) { + EVT VT = MVT::v2i32; + switch (Op.getValueType().getSimpleVT().SimpleTy) { default: break; case MVT::v16i8: case MVT::v8i16: @@ -4435,21 +4514,21 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. unsigned char OpFlag = 0; unsigned WrapperKind = X86ISD::Wrapper; - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - if (Subtarget->isPICStyleStub()) - OpFlag = X86II::MO_PIC_BASE_OFFSET; - else if (Subtarget->isPICStyleGOT()) - OpFlag = X86II::MO_GOTOFF; - else if (Subtarget->isPICStyleRIPRel() && - getTargetMachine().getCodeModel() == CodeModel::Small) - WrapperKind = X86ISD::WrapperRIP; - } - + CodeModel::Model M = getTargetMachine().getCodeModel(); + + if (Subtarget->isPICStyleRIPRel() && + (M == CodeModel::Small || M == CodeModel::Kernel)) + WrapperKind = X86ISD::WrapperRIP; + else if (Subtarget->isPICStyleGOT()) + OpFlag = X86II::MO_GOTOFF; + else if (Subtarget->isPICStyleStubPIC()) + OpFlag = X86II::MO_PIC_BASE_OFFSET; + SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(), CP->getAlignment(), CP->getOffset(), OpFlag); @@ -4468,25 +4547,26 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. unsigned char OpFlag = 0; unsigned WrapperKind = X86ISD::Wrapper; - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - if (Subtarget->isPICStyleStub()) - OpFlag = X86II::MO_PIC_BASE_OFFSET; - else if (Subtarget->isPICStyleGOT()) - OpFlag = X86II::MO_GOTOFF; - else if (Subtarget->isPICStyleRIPRel()) - WrapperKind = X86ISD::WrapperRIP; - } - + CodeModel::Model M = getTargetMachine().getCodeModel(); + + if (Subtarget->isPICStyleRIPRel() && + (M == CodeModel::Small || M == CodeModel::Kernel)) + WrapperKind = X86ISD::WrapperRIP; + else if (Subtarget->isPICStyleGOT()) + OpFlag = X86II::MO_GOTOFF; + else if (Subtarget->isPICStyleStubPIC()) + OpFlag = X86II::MO_PIC_BASE_OFFSET; + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(), OpFlag); DebugLoc DL = JT->getDebugLoc(); Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); - + // With PIC, the address is actually $g + Offset. if (OpFlag) { Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), @@ -4494,43 +4574,44 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { DebugLoc::getUnknownLoc(), getPointerTy()), Result); } - + return Result; } SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); - + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. unsigned char OpFlag = 0; unsigned WrapperKind = X86ISD::Wrapper; - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - if (Subtarget->isPICStyleStub()) - OpFlag = X86II::MO_PIC_BASE_OFFSET; - else if (Subtarget->isPICStyleGOT()) - OpFlag = X86II::MO_GOTOFF; - else if (Subtarget->isPICStyleRIPRel()) - WrapperKind = X86ISD::WrapperRIP; - } - + CodeModel::Model M = getTargetMachine().getCodeModel(); + + if (Subtarget->isPICStyleRIPRel() && + (M == CodeModel::Small || M == CodeModel::Kernel)) + WrapperKind = X86ISD::WrapperRIP; + else if (Subtarget->isPICStyleGOT()) + OpFlag = X86II::MO_GOTOFF; + else if (Subtarget->isPICStyleStubPIC()) + OpFlag = X86II::MO_PIC_BASE_OFFSET; + SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag); - + DebugLoc DL = Op.getDebugLoc(); Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); - - + + // With PIC, the address is actually $g + Offset. if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && - !Subtarget->isPICStyleRIPRel()) { + !Subtarget->is64Bit()) { Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(), getPointerTy()), Result); } - + return Result; } @@ -4538,53 +4619,37 @@ SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, int64_t Offset, SelectionDAG &DAG) const { - bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_; - bool ExtraLoadRequired = - Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false); - // Create the TargetGlobalAddress node, folding in the constant // offset if it is legal. + unsigned char OpFlags = + Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); + CodeModel::Model M = getTargetMachine().getCodeModel(); SDValue Result; - if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) { + if (OpFlags == X86II::MO_NO_FLAG && + X86::isOffsetSuitableForCodeModel(Offset, M)) { + // A direct static reference to a global. Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); Offset = 0; } else { - unsigned char OpFlags = 0; - - if (Subtarget->isPICStyleRIPRel() && - getTargetMachine().getRelocationModel() != Reloc::Static) { - if (ExtraLoadRequired) - OpFlags = X86II::MO_GOTPCREL; - } else if (Subtarget->isPICStyleGOT() && - getTargetMachine().getRelocationModel() == Reloc::PIC_) { - if (ExtraLoadRequired) - OpFlags = X86II::MO_GOT; - else - OpFlags = X86II::MO_GOTOFF; - } - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); } - + if (Subtarget->isPICStyleRIPRel() && - getTargetMachine().getCodeModel() == CodeModel::Small) + (M == CodeModel::Small || M == CodeModel::Kernel)) Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result); else Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); // With PIC, the address is actually $g + Offset. - if (IsPic && !Subtarget->isPICStyleRIPRel()) { + if (isGlobalRelativeToPICBase(OpFlags)) { Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()), Result); } - // For Darwin & Mingw32, external and weak symbols are indirect, so we want to - // load the value at address GV, not the value of GV itself. This means that - // the GlobalAddress must be in the base or index register of the address, not - // the GV offset field. Platform check is inside GVRequiresExtraLoad() call - // The same applies for external symbols during PIC codegen - if (ExtraLoadRequired) + // For globals that require a load from a stub to get the address, emit the + // load. + if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, PseudoSourceValue::getGOT(), 0); @@ -4606,7 +4671,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, - SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg, + SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags) { SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); DebugLoc dl = GA->getDebugLoc(); @@ -4628,7 +4693,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, - const MVT PtrVT) { + const EVT PtrVT) { SDValue InFlag; DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, @@ -4643,7 +4708,7 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, - const MVT PtrVT) { + const EVT PtrVT) { return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX, X86II::MO_TLSGD); } @@ -4651,7 +4716,7 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, // Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or // "local exec" model. static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, - const MVT PtrVT, TLSModel::Model model, + const EVT PtrVT, TLSModel::Model model, bool is64Bit) { DebugLoc dl = GA->getDebugLoc(); // Get the Thread Pointer @@ -4677,7 +4742,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, assert(model == TLSModel::InitialExec); OperandFlags = X86II::MO_INDNTPOFF; } - + // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial // exec) SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), @@ -4701,29 +4766,29 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { "TLS not implemented for non-ELF targets"); GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GA->getGlobal(); - + // If GV is an alias then use the aliasee for determining // thread-localness. if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) GV = GA->resolveAliasedGlobal(false); - + TLSModel::Model model = getTLSModel(GV, getTargetMachine().getRelocationModel()); - + switch (model) { case TLSModel::GeneralDynamic: case TLSModel::LocalDynamic: // not implemented if (Subtarget->is64Bit()) return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); - + case TLSModel::InitialExec: case TLSModel::LocalExec: return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, Subtarget->is64Bit()); } - - assert(0 && "Unreachable"); + + llvm_unreachable("Unreachable"); return SDValue(); } @@ -4732,17 +4797,16 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { /// take a 2 x i32 value to shift plus a shift amount. SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); DebugLoc dl = Op.getDebugLoc(); bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - SDValue Tmp1 = isSRA ? - DAG.getNode(ISD::SRA, dl, VT, ShOpHi, - DAG.getConstant(VTBits - 1, MVT::i8)) : - DAG.getConstant(0, VT); + SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi, + DAG.getConstant(VTBits - 1, MVT::i8)) + : DAG.getConstant(0, VT); SDValue Tmp2, Tmp3; if (Op.getOpcode() == ISD::SHL_PARTS) { @@ -4754,9 +4818,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { } SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, - DAG.getConstant(VTBits, MVT::i8)); + DAG.getConstant(VTBits, MVT::i8)); SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT, - AndNode, DAG.getConstant(0, MVT::i8)); + AndNode, DAG.getConstant(0, MVT::i8)); SDValue Hi, Lo; SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8); @@ -4776,7 +4840,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { - MVT SrcVT = Op.getOperand(0).getValueType(); + EVT SrcVT = Op.getOperand(0).getValueType(); if (SrcVT.isVector()) { if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) { @@ -4808,7 +4872,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); } -SDValue X86TargetLowering::BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain, +SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) { // Build the FILD @@ -4888,19 +4952,22 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { */ DebugLoc dl = Op.getDebugLoc(); + LLVMContext *Context = DAG.getContext(); // Build some magic constants. std::vector<Constant*> CV0; - CV0.push_back(ConstantInt::get(APInt(32, 0x45300000))); - CV0.push_back(ConstantInt::get(APInt(32, 0x43300000))); - CV0.push_back(ConstantInt::get(APInt(32, 0))); - CV0.push_back(ConstantInt::get(APInt(32, 0))); + CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000))); + CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000))); + CV0.push_back(ConstantInt::get(*Context, APInt(32, 0))); + CV0.push_back(ConstantInt::get(*Context, APInt(32, 0))); Constant *C0 = ConstantVector::get(CV0); SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16); std::vector<Constant*> CV1; - CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL)))); - CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL)))); + CV1.push_back( + ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); + CV1.push_back( + ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); @@ -4965,7 +5032,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) { SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); // Handle final rounding. - MVT DestVT = Op.getValueType(); + EVT DestVT = Op.getValueType(); if (DestVT.bitsLT(MVT::f64)) { return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, @@ -4988,7 +5055,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0); - MVT SrcVT = N0.getValueType(); + EVT SrcVT = N0.getValueType(); if (SrcVT == MVT::i64) { // We only handle SSE2 f64 target here; caller can expand the rest. if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) @@ -5017,7 +5084,7 @@ std::pair<SDValue,SDValue> X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { DebugLoc dl = Op.getDebugLoc(); - MVT DstTy = Op.getValueType(); + EVT DstTy = Op.getValueType(); if (!IsSigned) { assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); @@ -5043,10 +5110,10 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { unsigned MemSize = DstTy.getSizeInBits()/8; int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - + unsigned Opc; - switch (DstTy.getSimpleVT()) { - default: assert(0 && "Invalid FP_TO_SINT to lower!"); + switch (DstTy.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; @@ -5105,18 +5172,19 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { + LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - MVT VT = Op.getValueType(); - MVT EltVT = VT; + EVT VT = Op.getValueType(); + EVT EltVT = VT; if (VT.isVector()) EltVT = VT.getVectorElementType(); std::vector<Constant*> CV; if (EltVT == MVT::f64) { - Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))); + Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))); CV.push_back(C); CV.push_back(C); } else { - Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))); + Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))); CV.push_back(C); CV.push_back(C); CV.push_back(C); @@ -5131,21 +5199,19 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { + LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - MVT VT = Op.getValueType(); - MVT EltVT = VT; - unsigned EltNum = 1; - if (VT.isVector()) { + EVT VT = Op.getValueType(); + EVT EltVT = VT; + if (VT.isVector()) EltVT = VT.getVectorElementType(); - EltNum = VT.getVectorNumElements(); - } std::vector<Constant*> CV; if (EltVT == MVT::f64) { - Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63))); + Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))); CV.push_back(C); CV.push_back(C); } else { - Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31))); + Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); CV.push_back(C); CV.push_back(C); CV.push_back(C); @@ -5168,11 +5234,12 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { + LLVMContext *Context = DAG.getContext(); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); - MVT VT = Op.getValueType(); - MVT SrcVT = Op1.getValueType(); + EVT VT = Op.getValueType(); + EVT SrcVT = Op1.getValueType(); // If second operand is smaller, extend it first. if (SrcVT.bitsLT(VT)) { @@ -5191,13 +5258,13 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { // First get the sign bit of second operand. std::vector<Constant*> CV; if (SrcVT == MVT::f64) { - CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63)))); - CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); } else { - CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31)))); - CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); } Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); @@ -5220,13 +5287,13 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { // Clear first operand sign bit. CV.clear(); if (VT == MVT::f64) { - CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))))); - CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); } else { - CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31))))); - CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); } C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); @@ -5299,21 +5366,48 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, Opcode = X86ISD::ADD; NumOperands = 2; break; + case ISD::AND: { + // If the primary and result isn't used, don't bother using X86ISD::AND, + // because a TEST instruction will be better. + bool NonFlagUse = false; + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() != ISD::BRCOND && + UI->getOpcode() != ISD::SELECT && + UI->getOpcode() != ISD::SETCC) { + NonFlagUse = true; + break; + } + if (!NonFlagUse) + break; + } + // FALL THROUGH case ISD::SUB: - // Due to the ISEL shortcoming noted above, be conservative if this sub is + case ISD::OR: + case ISD::XOR: + // Due to the ISEL shortcoming noted above, be conservative if this op is // likely to be selected as part of a load-modify-store instruction. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) if (UI->getOpcode() == ISD::STORE) goto default_case; - // Otherwise use a regular EFLAGS-setting sub. - Opcode = X86ISD::SUB; + // Otherwise use a regular EFLAGS-setting instruction. + switch (Op.getNode()->getOpcode()) { + case ISD::SUB: Opcode = X86ISD::SUB; break; + case ISD::OR: Opcode = X86ISD::OR; break; + case ISD::XOR: Opcode = X86ISD::XOR; break; + case ISD::AND: Opcode = X86ISD::AND; break; + default: llvm_unreachable("unexpected operator!"); + } NumOperands = 2; break; case X86ISD::ADD: case X86ISD::SUB: case X86ISD::INC: case X86ISD::DEC: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: return SDValue(Op.getNode(), 1); default: default_case: @@ -5419,14 +5513,14 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); DebugLoc dl = Op.getDebugLoc(); if (isFP) { unsigned SSECC = 8; - MVT VT0 = Op0.getValueType(); + EVT VT0 = Op0.getValueType(); assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64); unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD; bool Swap = false; @@ -5469,7 +5563,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ); } - assert(0 && "Illegal FP comparison"); + llvm_unreachable("Illegal FP comparison"); } // Handle all other FP comparisons here. return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); @@ -5481,10 +5575,13 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { unsigned Opc = 0, EQOpc = 0, GTOpc = 0; bool Swap = false, Invert = false, FlipSigns = false; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: break; + case MVT::v8i8: case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; + case MVT::v4i16: case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; + case MVT::v2i32: case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; } @@ -5508,7 +5605,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. if (FlipSigns) { - MVT EltVT = VT.getVectorElementType(); + EVT EltVT = VT.getVectorElementType(); SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), EltVT); std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit); @@ -5538,7 +5635,10 @@ static bool isX86LogicalCmp(SDValue Op) { Opc == X86ISD::SMUL || Opc == X86ISD::UMUL || Opc == X86ISD::INC || - Opc == X86ISD::DEC)) + Opc == X86ISD::DEC || + Opc == X86ISD::OR || + Opc == X86ISD::XOR || + Opc == X86ISD::AND)) return true; return false; @@ -5560,7 +5660,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { SDValue Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && @@ -5751,8 +5851,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Flag; - MVT IntPtr = getPointerTy(); - MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; + EVT IntPtr = getPointerTy(); + EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); @@ -5802,8 +5902,8 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { - MVT IntPtr = getPointerTy(); - const Type *IntPtrTy = TD->getIntPtrType(); + EVT IntPtr = getPointerTy(); + const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; @@ -5812,8 +5912,9 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, Entry.Node = Size; Args.push_back(Entry); std::pair<SDValue,SDValue> CallResult = - LowerCallTo(Chain, Type::VoidTy, false, false, false, false, - 0, CallingConv::C, false, + LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), + false, false, false, false, + 0, CallingConv::C, false, /*isReturnValueUsed=*/false, DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); return CallResult.second; } @@ -5824,7 +5925,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, uint64_t SizeVal = ConstantSize->getZExtValue(); SDValue InFlag(0, 0); - MVT AVT; + EVT AVT; SDValue Count; ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); unsigned BytesLeft = 0; @@ -5893,7 +5994,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, if (TwoRepStos) { InFlag = Chain.getValue(1); Count = Size; - MVT CVT = Count.getValueType(); + EVT CVT = Count.getValueType(); SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : @@ -5909,8 +6010,8 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, } else if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; - MVT AddrVT = Dst.getValueType(); - MVT SizeVT = Size.getValueType(); + EVT AddrVT = Dst.getValueType(); + EVT SizeVT = Size.getValueType(); Chain = DAG.getMemset(Chain, dl, DAG.getNode(ISD::ADD, dl, AddrVT, Dst, @@ -5945,7 +6046,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, return SDValue(); // DWORD aligned - MVT AVT = MVT::i32; + EVT AVT = MVT::i32; if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned AVT = MVT::i64; @@ -5980,9 +6081,9 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; - MVT DstVT = Dst.getValueType(); - MVT SrcVT = Src.getValueType(); - MVT SizeVT = Size.getValueType(); + EVT DstVT = Dst.getValueType(); + EVT SrcVT = Src.getValueType(); + EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), @@ -6054,8 +6155,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) { SDValue SrcPtr = Op.getOperand(1); SDValue SrcSV = Op.getOperand(2); - assert(0 && "VAArgInst is not yet implemented for x86-64!"); - abort(); + llvm_report_error("VAArgInst is not yet implemented for x86-64!"); return SDValue(); } @@ -6179,6 +6279,36 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + // ptest intrinsics. The intrinsic these come from are designed to return + // an integer value, not just an instruction so lower it to the ptest + // pattern and a setcc for the result. + case Intrinsic::x86_sse41_ptestz: + case Intrinsic::x86_sse41_ptestc: + case Intrinsic::x86_sse41_ptestnzc:{ + unsigned X86CC = 0; + switch (IntNo) { + default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); + case Intrinsic::x86_sse41_ptestz: + // ZF = 1 + X86CC = X86::COND_E; + break; + case Intrinsic::x86_sse41_ptestc: + // CF = 1 + X86CC = X86::COND_B; + break; + case Intrinsic::x86_sse41_ptestnzc: + // ZF and CF = 0 + X86CC = X86::COND_A; + break; + } + + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS); + SDValue CC = DAG.getConstant(X86CC, MVT::i8); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } // Fix vector shift instructions where the last operand is a non-immediate // i32 value. @@ -6203,7 +6333,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { return SDValue(); unsigned NewIntNo = 0; - MVT ShAmtVT = MVT::v4i32; + EVT ShAmtVT = MVT::v4i32; switch (IntNo) { case Intrinsic::x86_sse2_pslli_w: NewIntNo = Intrinsic::x86_sse2_psll_w; @@ -6256,14 +6386,28 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_mmx_psrai_d: NewIntNo = Intrinsic::x86_mmx_psra_d; break; - default: abort(); // Can't reach here. + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. } break; } } - MVT VT = Op.getValueType(); - ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShAmtVT, ShAmt)); + + // The vector shift intrinsics with scalars uses 32b shift amounts but + // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits + // to be zero. + SDValue ShOps[4]; + ShOps[0] = ShAmt; + ShOps[1] = DAG.getConstant(0, MVT::i32); + if (ShAmtVT == MVT::v4i32) { + ShOps[2] = DAG.getUNDEF(MVT::i32); + ShOps[3] = DAG.getUNDEF(MVT::i32); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4); + } else { + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); + } + + EVT VT = Op.getValueType(); + ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(NewIntNo, MVT::i32), Op.getOperand(1), ShAmt); @@ -6295,7 +6439,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; @@ -6401,12 +6545,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, } else { const Function *Func = cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); - unsigned CC = Func->getCallingConv(); + CallingConv::ID CC = Func->getCallingConv(); unsigned NestReg; switch (CC) { default: - assert(0 && "Unsupported calling convention"); + llvm_unreachable("Unsupported calling convention"); case CallingConv::C: case CallingConv::X86_StdCall: { // Pass 'nest' parameter in ECX. @@ -6428,8 +6572,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; if (InRegCount > 2) { - cerr << "Nest register in use - reduce number of inreg parameters!\n"; - abort(); + llvm_report_error("Nest register in use - reduce number of inreg parameters!"); } } break; @@ -6499,7 +6642,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { const TargetMachine &TM = MF.getTarget(); const TargetFrameInfo &TFI = *TM.getFrameInfo(); unsigned StackAlignment = TFI.getStackAlignment(); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // Save FP Control Word to stack slot @@ -6537,8 +6680,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT OpVT = VT; + EVT VT = Op.getValueType(); + EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); DebugLoc dl = Op.getDebugLoc(); @@ -6570,8 +6713,8 @@ SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT OpVT = VT; + EVT VT = Op.getValueType(); + EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); DebugLoc dl = Op.getDebugLoc(); @@ -6599,7 +6742,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); DebugLoc dl = Op.getDebugLoc(); @@ -6656,7 +6799,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); switch (Op.getOpcode()) { - default: assert(0 && "Unknown ovf instruction!"); + default: llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: // A subtract of one will be selected as a INC. Note that INC doesn't // set CF, so we can't do this for UADDO. @@ -6712,11 +6855,11 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { - MVT T = Op.getValueType(); + EVT T = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned Reg = 0; unsigned size = 0; - switch(T.getSimpleVT()) { + switch(T.getSimpleVT().SimpleTy) { default: assert(false && "Invalid value type!"); case MVT::i8: Reg = X86::AL; size = 1; break; @@ -6763,7 +6906,7 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); - MVT T = Node->getValueType(0); + EVT T = Node->getValueType(0); SDValue negOp = DAG.getNode(ISD::SUB, dl, T, DAG.getConstant(0, T), Node->getOperand(2)); return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, @@ -6778,7 +6921,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { /// SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { - default: assert(0 && "Should not custom lower this!"); + default: llvm_unreachable("Should not custom lower this!"); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); @@ -6805,9 +6948,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); - case ISD::CALL: return LowerCALL(Op, DAG); - case ISD::RET: return LowerRET(Op, DAG); - case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); @@ -6836,7 +6976,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { void X86TargetLowering:: ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG, unsigned NewOp) { - MVT T = Node->getValueType(0); + EVT T = Node->getValueType(0); DebugLoc dl = Node->getDebugLoc(); assert (T == MVT::i64 && "Only know how to expand i64 atomics"); @@ -6846,12 +6986,11 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, Node->getOperand(2), DAG.getIntPtrConstant(0)); SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Node->getOperand(2), DAG.getIntPtrConstant(1)); - // This is a generalized SDNode, not an AtomicSDNode, so it doesn't - // have a MemOperand. Pass the info through as a normal operand. - SDValue LSI = DAG.getMemOperand(cast<MemSDNode>(Node)->getMemOperand()); - SDValue Ops[] = { Chain, In1, In2L, In2H, LSI }; + SDValue Ops[] = { Chain, In1, In2L, In2H }; SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); - SDValue Result = DAG.getNode(NewOp, dl, Tys, Ops, 5); + SDValue Result = + DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64, + cast<MemSDNode>(Node)->getMemOperand()); SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); Results.push_back(Result.getValue(2)); @@ -6872,7 +7011,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, FP_TO_INTHelper(SDValue(N, 0), DAG, true); SDValue FIST = Vals.first, StackSlot = Vals.second; if (FIST.getNode() != 0) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // Return a load from the stack slot. Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0)); } @@ -6893,7 +7032,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::ATOMIC_CMP_SWAP: { - MVT T = N->getValueType(0); + EVT T = N->getValueType(0); assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); SDValue cpInL, cpInH; cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2), @@ -6969,7 +7108,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FLD: return "X86ISD::FLD"; case X86ISD::FST: return "X86ISD::FST"; case X86ISD::CALL: return "X86ISD::CALL"; - case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; case X86ISD::BT: return "X86ISD::BT"; case X86ISD::CMP: return "X86ISD::CMP"; @@ -7027,7 +7165,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UMUL: return "X86ISD::UMUL"; case X86ISD::INC: return "X86ISD::INC"; case X86ISD::DEC: return "X86ISD::DEC"; + case X86ISD::OR: return "X86ISD::OR"; + case X86ISD::XOR: return "X86ISD::XOR"; + case X86ISD::AND: return "X86ISD::AND"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; + case X86ISD::PTEST: return "X86ISD::PTEST"; + case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; } } @@ -7036,28 +7179,28 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { // X86 supports extremely general addressing modes. + CodeModel::Model M = getTargetMachine().getCodeModel(); // X86 allows a sign-extended 32-bit immediate field as a displacement. - if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) + if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL)) return false; if (AM.BaseGV) { - // We can only fold this if we don't need an extra load. - if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) + unsigned GVFlags = + Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine()); + + // If a reference to this global requires an extra load, we can't fold it. + if (isGlobalStubReference(GVFlags)) return false; - // If BaseGV requires a register, we cannot also have a BaseReg. - if (Subtarget->GVRequiresRegister(AM.BaseGV, getTargetMachine(), false) && - AM.HasBaseReg) + + // If BaseGV requires a register for the PIC base, we cannot also have a + // BaseReg specified. + if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags)) return false; - // X86-64 only supports addr of globals in small code model. - if (Subtarget->is64Bit()) { - if (getTargetMachine().getCodeModel() != CodeModel::Small) - return false; - // If lower 4G is not available, then we must use rip-relative addressing. - if (AM.BaseOffs || AM.Scale > 1) - return false; - } + // If lower 4G is not available, then we must use rip-relative addressing. + if (Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1)) + return false; } switch (AM.Scale) { @@ -7094,7 +7237,7 @@ bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { return Subtarget->is64Bit() || NumBits1 < 64; } -bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const { +bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); @@ -7106,15 +7249,16 @@ bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const { bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. - return Ty1 == Type::Int32Ty && Ty2 == Type::Int64Ty && Subtarget->is64Bit(); + return Ty1 == Type::getInt32Ty(Ty1->getContext()) && + Ty2 == Type::getInt64Ty(Ty1->getContext()) && Subtarget->is64Bit(); } -bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const { +bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit(); } -bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const { +bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { // i16 instructions are longer (0x66 prefix) and potentially slower. return !(VT1 == MVT::i32 && VT2 == MVT::i16); } @@ -7124,8 +7268,8 @@ bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const { /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool -X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, - MVT VT) const { +X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, + EVT VT) const { // Only do shuffles on 128-bit vector types for now. if (VT.getSizeInBits() == 64) return false; @@ -7146,7 +7290,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, bool X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, - MVT VT) const { + EVT VT) const { unsigned NumElts = VT.getVectorNumElements(); // FIXME: This collection of masks seems suspect. if (NumElts == 2) @@ -7254,7 +7398,8 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, (*MIB).addOperand(*argOpers[i]); MIB.addReg(t2); assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); - (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); + (*MIB).setMemRefs(bInstr->memoperands_begin(), + bInstr->memoperands_end()); MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg()); MIB.addReg(EAXreg); @@ -7406,7 +7551,8 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, (*MIB).addOperand(*argOpers[i]); assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); - (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); + (*MIB).setMemRefs(bInstr->memoperands_begin(), + bInstr->memoperands_end()); MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3); MIB.addReg(X86::EAX); @@ -7450,7 +7596,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors to thisMBB to nextMBB + // Move all successors of thisMBB to nextMBB nextMBB->transferSuccessors(thisMBB); // Update thisMBB to fall through to newMBB @@ -7510,7 +7656,8 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, (*MIB).addOperand(*argOpers[i]); MIB.addReg(t3); assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand"); - (*MIB).addMemOperand(*F, *mInstr->memoperands_begin()); + (*MIB).setMemRefs(mInstr->memoperands_begin(), + mInstr->memoperands_end()); MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg()); MIB.addReg(X86::EAX); @@ -7522,70 +7669,190 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, return nextMBB; } - +// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8 +// all of this code can be replaced with that in the .td file. MachineBasicBlock * -X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { +X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, + unsigned numArgs, bool memArg) const { + + MachineFunction *F = BB->getParent(); DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + unsigned Opc; + if (memArg) + Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; + else + Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr; + + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc)); + + for (unsigned i = 0; i < numArgs; ++i) { + MachineOperand &Op = MI->getOperand(i+1); + + if (!(Op.isReg() && Op.isImplicit())) + MIB.addOperand(Op); + } + + BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) + .addReg(X86::XMM0); + + F->DeleteMachineInstr(MI); + + return BB; +} + +MachineBasicBlock * +X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( + MachineInstr *MI, + MachineBasicBlock *MBB) const { + // Emit code to save XMM registers to the stack. The ABI says that the + // number of registers to save is given in %al, so it's theoretically + // possible to do an indirect jump trick to avoid saving all of them, + // however this code takes a simpler approach and just executes all + // of the stores if %al is non-zero. It's less code, and it's probably + // easier on the hardware branch predictor, and stores aren't all that + // expensive anyway. + + // Create the new basic blocks. One block contains all the XMM stores, + // and one block is the final destination regardless of whether any + // stores were performed. + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction *F = MBB->getParent(); + MachineFunction::iterator MBBIter = MBB; + ++MBBIter; + MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(MBBIter, XMMSaveMBB); + F->insert(MBBIter, EndMBB); + + // Set up the CFG. + // Move any original successors of MBB to the end block. + EndMBB->transferSuccessors(MBB); + // The original block will now fall through to the XMM save block. + MBB->addSuccessor(XMMSaveMBB); + // The XMMSaveMBB will fall through to the end block. + XMMSaveMBB->addSuccessor(EndMBB); + + // Now add the instructions. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + unsigned CountReg = MI->getOperand(0).getReg(); + int64_t RegSaveFrameIndex = MI->getOperand(1).getImm(); + int64_t VarArgsFPOffset = MI->getOperand(2).getImm(); + + if (!Subtarget->isTargetWin64()) { + // If %al is 0, branch around the XMM save block. + BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); + BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB); + MBB->addSuccessor(EndMBB); + } + + // In the XMM save block, save all the XMM argument registers. + for (int i = 3, e = MI->getNumOperands(); i != e; ++i) { + int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; + MachineMemOperand *MMO = + F->getMachineMemOperand( + PseudoSourceValue::getFixedStack(RegSaveFrameIndex), + MachineMemOperand::MOStore, Offset, + /*Size=*/16, /*Align=*/16); + BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr)) + .addFrameIndex(RegSaveFrameIndex) + .addImm(/*Scale=*/1) + .addReg(/*IndexReg=*/0) + .addImm(/*Disp=*/Offset) + .addReg(/*Segment=*/0) + .addReg(MI->getOperand(i).getReg()) + .addMemOperand(MMO); + } + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + + return EndMBB; +} + +MachineBasicBlock * +X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // bCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + unsigned Opc = + X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); + BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + // Update machine-CFG edges by first adding all successors of the current + // block to the new block which will contain the Phi node for the select. + // Also inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), + E = BB->succ_end(); I != E; ++I) { + EM->insert(std::make_pair(*I, sinkMBB)); + sinkMBB->addSuccessor(*I); + } + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while (!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); + // Add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = sinkMBB; + BuildMI(BB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; +} + + +MachineBasicBlock * +X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); + case X86::CMOV_GR8: case X86::CMOV_V1I64: case X86::CMOV_FR32: case X86::CMOV_FR64: case X86::CMOV_V4F32: case X86::CMOV_V2F64: - case X86::CMOV_V2I64: { - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // cmpTY ccX, r1, r2 - // bCC copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - unsigned Opc = - X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); - BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - // Update machine-CFG edges by transferring all successors of the current - // block to the new block which will contain the Phi node for the select. - sinkMBB->transferSuccessors(BB); - - // Add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] - // ... - BB = sinkMBB; - BuildMI(BB, dl, TII->get(X86::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. - return BB; - } + case X86::CMOV_V2I64: + return EmitLoweredSelect(MI, BB, EM); case X86::FP32_TO_INT16_IN_MEM: case X86::FP32_TO_INT32_IN_MEM: @@ -7596,33 +7863,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::FP80_TO_INT16_IN_MEM: case X86::FP80_TO_INT32_IN_MEM: case X86::FP80_TO_INT64_IN_MEM: { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. MachineFunction *F = BB->getParent(); int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); - addFrameReference(BuildMI(BB, dl, TII->get(X86::FNSTCW16m)), CWFrameIdx); + addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); // Load the old value of the high byte of the control word... unsigned OldCW = F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); - addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16rm), OldCW), + addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); // Set the high part to be round to zero... - addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mi)), CWFrameIdx) + addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx) .addImm(0xC7F); // Reload the modified control word now... - addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx); + addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); // Restore the memory image of control word to original value - addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mr)), CWFrameIdx) + addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx) .addReg(OldCW); // Get the X86 opcode to use. unsigned Opc; switch (MI->getOpcode()) { - default: assert(0 && "illegal opcode!"); + default: llvm_unreachable("illegal opcode!"); case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; @@ -7655,15 +7925,26 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } else { AM.Disp = Op.getImm(); } - addFullAddress(BuildMI(BB, dl, TII->get(Opc)), AM) + addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM) .addReg(MI->getOperand(X86AddrNumOperands).getReg()); // Reload the original control word now. - addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx); + addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return BB; } + // String/text processing lowering. + case X86::PCMPISTRM128REG: + return EmitPCMP(MI, BB, 3, false /* in-mem */); + case X86::PCMPISTRM128MEM: + return EmitPCMP(MI, BB, 3, true /* in-mem */); + case X86::PCMPESTRM128REG: + return EmitPCMP(MI, BB, 5, false /* in mem */); + case X86::PCMPESTRM128MEM: + return EmitPCMP(MI, BB, 5, true /* in mem */); + + // Atomic Lowering. case X86::ATOMAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, X86::AND32ri, X86::MOV32rm, @@ -7825,6 +8106,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, X86::MOV32rr, X86::MOV32rr, X86::MOV32ri, X86::MOV32ri, false); + case X86::VASTART_SAVE_XMM_REGS: + return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); } } @@ -7855,6 +8138,9 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case X86ISD::UMUL: case X86ISD::INC: case X86ISD::DEC: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: // These nodes' second result is a boolean. if (Op.getResNo() == 0) break; @@ -7891,7 +8177,7 @@ static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, } static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, - MVT EVT, LoadSDNode *&LDBase, + EVT EltVT, LoadSDNode *&LDBase, unsigned &LastLoadedElt, SelectionDAG &DAG, MachineFrameInfo *MFI, const TargetLowering &TLI) { @@ -7919,7 +8205,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, continue; LoadSDNode *LD = cast<LoadSDNode>(Elt); - if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI)) + if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI)) return false; LastLoadedElt = i; } @@ -7935,8 +8221,8 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { DebugLoc dl = N->getDebugLoc(); - MVT VT = N->getValueType(0); - MVT EVT = VT.getVectorElementType(); + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); unsigned NumElems = VT.getVectorNumElements(); @@ -7947,7 +8233,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); LoadSDNode *LD = NULL; unsigned LastLoadedElt; - if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, LD, LastLoadedElt, DAG, + if (!EltsFromConsecutiveLoads(SVN, NumElems, EltVT, LD, LastLoadedElt, DAG, MFI, TLI)) return SDValue(); @@ -7976,57 +8262,159 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Get the LHS/RHS of the select. SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); - - // If we have SSE[12] support, try to form min/max nodes. + + // If we have SSE[12] support, try to form min/max nodes. SSE min/max + // instructions have the peculiarity that if either operand is a NaN, + // they chose what we call the RHS operand (and as such are not symmetric). + // It happens that this matches the semantics of the common C idiom + // x<y?x:y and related forms, so we can recognize these cases. if (Subtarget->hasSSE2() && (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) && Cond.getOpcode() == ISD::SETCC) { ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); unsigned Opcode = 0; + // Check for x CC y ? x : y. if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { switch (CC) { default: break; - case ISD::SETOLE: // (X <= Y) ? X : Y -> min + case ISD::SETULT: + // This can be a min if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(RHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(LHS)) + break; + } + Opcode = X86ISD::FMIN; + break; + case ISD::SETOLE: + // This can be a min if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(LHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(RHS)) + break; + } + Opcode = X86ISD::FMIN; + break; case ISD::SETULE: - case ISD::SETLE: - if (!UnsafeFPMath) break; - // FALL THROUGH. - case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min + // This can be a min, but if either operand is a NaN we need it to + // preserve the original LHS. + std::swap(LHS, RHS); + case ISD::SETOLT: case ISD::SETLT: + case ISD::SETLE: Opcode = X86ISD::FMIN; break; - case ISD::SETOGT: // (X > Y) ? X : Y -> max + case ISD::SETOGE: + // This can be a max if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(LHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(RHS)) + break; + } + Opcode = X86ISD::FMAX; + break; case ISD::SETUGT: + // This can be a max if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(RHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(LHS)) + break; + } + Opcode = X86ISD::FMAX; + break; + case ISD::SETUGE: + // This can be a max, but if either operand is a NaN we need it to + // preserve the original LHS. + std::swap(LHS, RHS); + case ISD::SETOGT: case ISD::SETGT: - if (!UnsafeFPMath) break; - // FALL THROUGH. - case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max case ISD::SETGE: Opcode = X86ISD::FMAX; break; } + // Check for x CC y ? y : x -- a min/max with reversed arms. } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { switch (CC) { default: break; - case ISD::SETOGT: // (X > Y) ? Y : X -> min + case ISD::SETOGE: + // This can be a min if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(RHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(LHS)) + break; + } + Opcode = X86ISD::FMIN; + break; case ISD::SETUGT: + // This can be a min if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(LHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(RHS)) + break; + } + Opcode = X86ISD::FMIN; + break; + case ISD::SETUGE: + // This can be a min, but if either operand is a NaN we need it to + // preserve the original LHS. + std::swap(LHS, RHS); + case ISD::SETOGT: case ISD::SETGT: - if (!UnsafeFPMath) break; - // FALL THROUGH. - case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min case ISD::SETGE: Opcode = X86ISD::FMIN; break; - case ISD::SETOLE: // (X <= Y) ? Y : X -> max + case ISD::SETULT: + // This can be a max if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(LHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(RHS)) + break; + } + Opcode = X86ISD::FMAX; + break; + case ISD::SETOLE: + // This can be a max if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(RHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(LHS)) + break; + } + Opcode = X86ISD::FMAX; + break; case ISD::SETULE: - case ISD::SETLE: - if (!UnsafeFPMath) break; - // FALL THROUGH. - case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max + // This can be a max, but if either operand is a NaN we need it to + // preserve the original LHS. + std::swap(LHS, RHS); + case ISD::SETOLT: case ISD::SETLT: + case ISD::SETLE: Opcode = X86ISD::FMAX; break; } @@ -8035,7 +8423,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (Opcode) return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); } - + // If this is a select between two integer constants, try to do some // optimizations. if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) { @@ -8045,7 +8433,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // If this is efficiently invertible, canonicalize the LHSC/RHSC values // so that TrueC (the true value) is larger than FalseC. bool NeedsCondInvert = false; - + if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) && // Efficiently invertible. (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible. @@ -8054,41 +8442,41 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, NeedsCondInvert = true; std::swap(TrueC, FalseC); } - + // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0. if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, DAG.getConstant(1, Cond.getValueType())); - + // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond); - + unsigned ShAmt = TrueC->getAPIntValue().logBase2(); return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond, DAG.getConstant(ShAmt, MVT::i8)); } - + // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, DAG.getConstant(1, Cond.getValueType())); - + // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); } - + // Optimize cases that will turn into an LEA instruction. This requires // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; - + bool isFastMultiplier = false; if (Diff < 10) { switch ((unsigned char)Diff) { @@ -8104,13 +8492,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; } } - + if (isFastMultiplier) { APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, DAG.getConstant(1, Cond.getValueType())); - + // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); @@ -8118,17 +8506,17 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (Diff != 1) Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, DAG.getConstant(Diff, Cond.getValueType())); - + // Add the base if non-zero. if (FalseC->getAPIntValue() != 0) Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); return Cond; } - } + } } } - + return SDValue(); } @@ -8136,11 +8524,11 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { DebugLoc DL = N->getDebugLoc(); - + // If the flag operand isn't dead, don't touch this CMOV. if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty()) return SDValue(); - + // If this is a select between two integer constants, try to do some // optimizations. Note that the operands are ordered the opposite of SELECT // operands. @@ -8149,12 +8537,12 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is // larger than FalseC (the false value). X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); - + if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { CC = X86::GetOppositeBranchCondition(CC); std::swap(TrueC, FalseC); } - + // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0. // This is efficient for any integer data type (including i8/i16) and // shift amount. @@ -8162,10 +8550,10 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, SDValue Cond = N->getOperand(3); Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, DAG.getConstant(CC, MVT::i8), Cond); - + // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond); - + unsigned ShAmt = TrueC->getAPIntValue().logBase2(); Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond, DAG.getConstant(ShAmt, MVT::i8)); @@ -8173,31 +8561,31 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return DCI.CombineTo(N, Cond, SDValue()); return Cond; } - + // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient // for any integer data type, including i8/i16. if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { SDValue Cond = N->getOperand(3); Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, DAG.getConstant(CC, MVT::i8), Cond); - + // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); - + if (N->getNumValues() == 2) // Dead flag value? return DCI.CombineTo(N, Cond, SDValue()); return Cond; } - + // Optimize cases that will turn into an LEA instruction. This requires // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; - + bool isFastMultiplier = false; if (Diff < 10) { switch ((unsigned char)Diff) { @@ -8213,7 +8601,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, break; } } - + if (isFastMultiplier) { APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); SDValue Cond = N->getOperand(3); @@ -8235,7 +8623,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return DCI.CombineTo(N, Cond, SDValue()); return Cond; } - } + } } } return SDValue(); @@ -8254,7 +8642,7 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); if (VT != MVT::i64) return SDValue(); @@ -8289,17 +8677,17 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, std::swap(MulAmt1, MulAmt2); SDValue NewMul; - if (isPowerOf2_64(MulAmt1)) + if (isPowerOf2_64(MulAmt1)) NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(Log2_64(MulAmt1), MVT::i8)); else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), DAG.getConstant(MulAmt1, VT)); - if (isPowerOf2_64(MulAmt2)) + if (isPowerOf2_64(MulAmt2)) NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, DAG.getConstant(Log2_64(MulAmt2), MVT::i8)); - else + else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, DAG.getConstant(MulAmt2, VT)); @@ -8321,14 +8709,14 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, if (!Subtarget->hasSSE2()) return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) return SDValue(); SDValue ShAmtOp = N->getOperand(1); - MVT EltVT = VT.getVectorElementType(); + EVT EltVT = VT.getVectorElementType(); DebugLoc DL = N->getDebugLoc(); - SDValue BaseShAmt; + SDValue BaseShAmt = SDValue(); if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) { unsigned NumElts = VT.getVectorNumElements(); unsigned i = 0; @@ -8347,21 +8735,40 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) { - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); + SDValue InVec = ShAmtOp.getOperand(0); + if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = InVec.getValueType().getVectorNumElements(); + unsigned i = 0; + for (; i != NumElts; ++i) { + SDValue Arg = InVec.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + BaseShAmt = Arg; + break; + } + } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { + unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); + if (C->getZExtValue() == SplatIdx) + BaseShAmt = InVec.getOperand(1); + } + } + if (BaseShAmt.getNode() == 0) + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, + DAG.getIntPtrConstant(0)); } else return SDValue(); + // The shift amount is an i32. if (EltVT.bitsGT(MVT::i32)) BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt); else if (EltVT.bitsLT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, BaseShAmt); + BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt); // The shift amount is identical so we can do a vector shift. SDValue ValOp = N->getOperand(0); switch (N->getOpcode()) { default: - assert(0 && "Unknown shift opcode!"); + llvm_unreachable("Unknown shift opcode!"); break; case ISD::SHL: if (VT == MVT::v2i64) @@ -8415,13 +8822,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // Similarly, turn load->store of i64 into double load/stores in 32-bit mode. StoreSDNode *St = cast<StoreSDNode>(N); - MVT VT = St->getValue().getValueType(); + EVT VT = St->getValue().getValueType(); if (VT.getSizeInBits() != 64) return SDValue(); const Function *F = DAG.getMachineFunction().getFunction(); bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); - bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps + bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasSSE2(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && @@ -8464,7 +8871,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store // pair instead. if (Subtarget->is64Bit() || F64IsLegal) { - MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; + EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getSrcValue(), Ld->getSrcValueOffset(), Ld->isVolatile(), @@ -8568,9 +8975,9 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { SDValue Op = N->getOperand(0); if (Op.getOpcode() == ISD::BIT_CONVERT) Op = Op.getOperand(0); - MVT VT = N->getValueType(0), OpVT = Op.getValueType(); + EVT VT = N->getValueType(0), OpVT = Op.getValueType(); if (Op.getOpcode() == X86ISD::VZEXT_LOAD && - VT.getVectorElementType().getSizeInBits() == + VT.getVectorElementType().getSizeInBits() == OpVT.getVectorElementType().getSizeInBits()) { return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); } @@ -8580,7 +8987,7 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { // On X86 and X86-64, atomic operations are lowered to locked instructions. // Locked instructions, in turn, have implicit fence semantics (all memory // operations are flushed before issuing the locked instruction, and the -// are not buffered), so we can fold away the common pattern of +// are not buffered), so we can fold away the common pattern of // fence-atomic-fence. static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) { SDValue atomic = N->getOperand(0); @@ -8601,11 +9008,11 @@ static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) { default: return SDValue(); } - + SDValue fence = atomic.getOperand(0); if (fence.getOpcode() != ISD::MEMBARRIER) return SDValue(); - + switch (atomic.getOpcode()) { case ISD::ATOMIC_CMP_SWAP: return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), @@ -8657,6 +9064,101 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, // X86 Inline Assembly Support //===----------------------------------------------------------------------===// +static bool LowerToBSwap(CallInst *CI) { + // FIXME: this should verify that we are targetting a 486 or better. If not, + // we will turn this bswap into something that will be lowered to logical ops + // instead of emitting the bswap asm. For now, we don't support 486 or lower + // so don't worry about this. + + // Verify this is a simple bswap. + if (CI->getNumOperands() != 2 || + CI->getType() != CI->getOperand(1)->getType() || + !CI->getType()->isInteger()) + return false; + + const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty || Ty->getBitWidth() % 16 != 0) + return false; + + // Okay, we can do this xform, do so now. + const Type *Tys[] = { Ty }; + Module *M = CI->getParent()->getParent()->getParent(); + Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + + Value *Op = CI->getOperand(1); + Op = CallInst::Create(Int, Op, CI->getName(), CI); + + CI->replaceAllUsesWith(Op); + CI->eraseFromParent(); + return true; +} + +bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { + InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); + std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints(); + + std::string AsmStr = IA->getAsmString(); + + // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" + std::vector<std::string> AsmPieces; + SplitString(AsmStr, AsmPieces, "\n"); // ; as separator? + + switch (AsmPieces.size()) { + default: return false; + case 1: + AsmStr = AsmPieces[0]; + AsmPieces.clear(); + SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace. + + // bswap $0 + if (AsmPieces.size() == 2 && + (AsmPieces[0] == "bswap" || + AsmPieces[0] == "bswapq" || + AsmPieces[0] == "bswapl") && + (AsmPieces[1] == "$0" || + AsmPieces[1] == "${0:q}")) { + // No need to check constraints, nothing other than the equivalent of + // "=r,0" would be valid here. + return LowerToBSwap(CI); + } + // rorw $$8, ${0:w} --> llvm.bswap.i16 + if (CI->getType() == Type::getInt16Ty(CI->getContext()) && + AsmPieces.size() == 3 && + AsmPieces[0] == "rorw" && + AsmPieces[1] == "$$8," && + AsmPieces[2] == "${0:w}" && + IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") { + return LowerToBSwap(CI); + } + break; + case 3: + if (CI->getType() == Type::getInt64Ty(CI->getContext()) && + Constraints.size() >= 2 && + Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && + Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { + // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 + std::vector<std::string> Words; + SplitString(AsmPieces[0], Words, " \t"); + if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") { + Words.clear(); + SplitString(AsmPieces[1], Words, " \t"); + if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") { + Words.clear(); + SplitString(AsmPieces[2], Words, " \t,"); + if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" && + Words[2] == "%edx") { + return LowerToBSwap(CI); + } + } + } + } + break; + } + return false; +} + + + /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. X86TargetLowering::ConstraintType @@ -8689,7 +9191,7 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const { /// with another that has more specific requirements based on the type of the /// corresponding operand. const char *X86TargetLowering:: -LowerXConstraint(MVT ConstraintVT) const { +LowerXConstraint(EVT ConstraintVT) const { // FP X constraints get lowered to SSE1/2 registers if available, otherwise // 'f' like normal targets. if (ConstraintVT.isFloatingPoint()) { @@ -8749,7 +9251,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // 32-bit signed value if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { const ConstantInt *CI = C->getConstantIntValue(); - if (CI->isValueValidForType(Type::Int32Ty, C->getSExtValue())) { + if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), + C->getSExtValue())) { // Widen to 64 bits here to get it sign extended. Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64); break; @@ -8763,7 +9266,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // 32-bit unsigned value if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { const ConstantInt *CI = C->getConstantIntValue(); - if (CI->isValueValidForType(Type::Int32Ty, C->getZExtValue())) { + if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), + C->getZExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); break; } @@ -8803,16 +9307,22 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, continue; } } - + // Otherwise, this isn't something we can handle, reject it. return; } + GlobalValue *GV = GA->getGlobal(); + // If we require an extra load to get this address, as in PIC mode, we + // can't accept it. + if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV, + getTargetMachine()))) + return; + if (hasMemory) - Op = LowerGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), Offset, DAG); + Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG); else - Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), - Offset); + Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset); Result = Op; break; } @@ -8828,12 +9338,42 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::vector<unsigned> X86TargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { if (Constraint.size() == 1) { // FIXME: not handling fp-stack yet! switch (Constraint[0]) { // GCC X86 Constraint Letters default: break; // Unknown constraint letter - case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) + case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. + if (Subtarget->is64Bit()) { + if (VT == MVT::i32) + return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, + X86::ESI, X86::EDI, X86::R8D, X86::R9D, + X86::R10D,X86::R11D,X86::R12D, + X86::R13D,X86::R14D,X86::R15D, + X86::EBP, X86::ESP, 0); + else if (VT == MVT::i16) + return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, + X86::SI, X86::DI, X86::R8W,X86::R9W, + X86::R10W,X86::R11W,X86::R12W, + X86::R13W,X86::R14W,X86::R15W, + X86::BP, X86::SP, 0); + else if (VT == MVT::i8) + return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, + X86::SIL, X86::DIL, X86::R8B,X86::R9B, + X86::R10B,X86::R11B,X86::R12B, + X86::R13B,X86::R14B,X86::R15B, + X86::BPL, X86::SPL, 0); + + else if (VT == MVT::i64) + return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, + X86::RSI, X86::RDI, X86::R8, X86::R9, + X86::R10, X86::R11, X86::R12, + X86::R13, X86::R14, X86::R15, + X86::RBP, X86::RSP, 0); + + break; + } + // 32-bit fallthrough case 'Q': // Q_REGS if (VT == MVT::i32) return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); @@ -8852,7 +9392,7 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, std::pair<unsigned, const TargetRegisterClass*> X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { // First, see if this is a constraint that directly corresponds to an LLVM // register class. if (Constraint.size() == 1) { @@ -8860,7 +9400,6 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, switch (Constraint[0]) { default: break; case 'r': // GENERAL_REGS - case 'R': // LEGACY_REGS case 'l': // INDEX_REGS if (VT == MVT::i8) return std::make_pair(0U, X86::GR8RegisterClass); @@ -8869,6 +9408,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, if (VT == MVT::i32 || !Subtarget->is64Bit()) return std::make_pair(0U, X86::GR32RegisterClass); return std::make_pair(0U, X86::GR64RegisterClass); + case 'R': // LEGACY_REGS + if (VT == MVT::i8) + return std::make_pair(0U, X86::GR8_NOREXRegisterClass); + if (VT == MVT::i16) + return std::make_pair(0U, X86::GR16_NOREXRegisterClass); + if (VT == MVT::i32 || !Subtarget->is64Bit()) + return std::make_pair(0U, X86::GR32_NOREXRegisterClass); + return std::make_pair(0U, X86::GR64_NOREXRegisterClass); case 'f': // FP Stack registers. // If SSE is enabled for this VT, use f80 to ensure the isel moves the // value to the correct fpstack register class. @@ -8886,7 +9433,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case 'x': // SSE_REGS if SSE1 allowed if (!Subtarget->hasSSE1()) break; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: break; // Scalar SSE types. case MVT::f32: @@ -8915,15 +9462,39 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // Not found as a standard register? if (Res.second == 0) { - // GCC calls "st(0)" just plain "st". + // Map st(0) -> st(7) -> ST0 + if (Constraint.size() == 7 && Constraint[0] == '{' && + tolower(Constraint[1]) == 's' && + tolower(Constraint[2]) == 't' && + Constraint[3] == '(' && + (Constraint[4] >= '0' && Constraint[4] <= '7') && + Constraint[5] == ')' && + Constraint[6] == '}') { + + Res.first = X86::ST0+Constraint[4]-'0'; + Res.second = X86::RFP80RegisterClass; + return Res; + } + + // GCC allows "st(0)" to be called just plain "st". if (StringsEqualNoCase("{st}", Constraint)) { Res.first = X86::ST0; Res.second = X86::RFP80RegisterClass; + return Res; + } + + // flags -> EFLAGS + if (StringsEqualNoCase("{flags}", Constraint)) { + Res.first = X86::EFLAGS; + Res.second = X86::CCRRegisterClass; + return Res; } + // 'A' means EAX + EDX. if (Constraint == "A") { Res.first = X86::EAX; - Res.second = X86::GRADRegisterClass; + Res.second = X86::GR32_ADRegisterClass; + return Res; } return Res; } @@ -9015,7 +9586,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, /// When and where to widen is target dependent based on the cost of /// scalarizing vs using the wider vector type. -MVT X86TargetLowering::getWidenVectorType(MVT VT) const { +EVT X86TargetLowering::getWidenVectorType(EVT VT) const { assert(VT.isVector()); if (isTypeLegal(VT)) return VT; @@ -9024,7 +9595,7 @@ MVT X86TargetLowering::getWidenVectorType(MVT VT) const { // type based on element type. This would speed up our search (though // it may not be worth it since the size of the list is relatively // small). - MVT EltVT = VT.getVectorElementType(); + EVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); // On X86, it make sense to widen any vector wider than 1 @@ -9033,7 +9604,7 @@ MVT X86TargetLowering::getWidenVectorType(MVT VT) const { for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; + EVT SVT = (MVT::SimpleValueType)nVT; if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT && diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index ffed46c..2f7b8ba 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -85,7 +85,7 @@ namespace llvm { /// as. FST, - /// CALL/TAILCALL - These operations represent an abstract X86 call + /// CALL - These operations represent an abstract X86 call /// instruction, which includes a bunch of information. In particular the /// operands of these node are: /// @@ -102,12 +102,8 @@ namespace llvm { /// #1 - The first register result value (optional) /// #2 - The second register result value (optional) /// - /// The CALL vs TAILCALL distinction boils down to whether the callee is - /// known not to modify the caller's stack frame, as is standard with - /// LLVM. CALL, - TAILCALL, - + /// RDTSC_DAG - This operation implements the lowering for /// readcyclecounter RDTSC_DAG, @@ -208,17 +204,6 @@ namespace llvm { LCMPXCHG_DAG, LCMPXCHG8_DAG, - // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, - // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - - // Atomic 64-bit binary operations. - ATOMADD64_DAG, - ATOMSUB64_DAG, - ATOMOR64_DAG, - ATOMXOR64_DAG, - ATOMAND64_DAG, - ATOMNAND64_DAG, - ATOMSWAP64_DAG, - // FNSTCW16m - Store FP control world into i16 memory. FNSTCW16m, @@ -241,10 +226,29 @@ namespace llvm { // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results. ADD, SUB, SMUL, UMUL, - INC, DEC, + INC, DEC, OR, XOR, AND, // MUL_IMM - X86 specific multiply by immediate. - MUL_IMM + MUL_IMM, + + // PTEST - Vector bitwise comparisons + PTEST, + + // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, + // according to %al. An operator is needed so that this can be expanded + // with control flow. + VASTART_SAVE_XMM_REGS, + + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, + // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - + // Atomic 64-bit binary operations. + ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, + ATOMSUB64_DAG, + ATOMOR64_DAG, + ATOMXOR64_DAG, + ATOMAND64_DAG, + ATOMNAND64_DAG, + ATOMSWAP64_DAG }; } @@ -333,6 +337,15 @@ namespace llvm { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW /// instructions. unsigned getShufflePSHUFLWImmediate(SDNode *N); + + /// isZeroNode - Returns true if Elt is a constant zero or a floating point + /// constant +0.0. + bool isZeroNode(SDValue Elt); + + /// isOffsetSuitableForCodeModel - Returns true of the given offset can be + /// fit into displacement field of the instruction. + bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, + bool hasSymbolicDisplacement = true); } //===--------------------------------------------------------------------===// @@ -374,12 +387,17 @@ namespace llvm { /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove - /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for + /// lowering. It returns EVT::iAny if SelectionDAG should be responsible for /// determining it. - virtual - MVT getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr, - SelectionDAG &DAG) const; + virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align, + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const; + + /// allowsUnalignedMemoryAccesses - Returns true if the target allows + /// unaligned memory accesses. of the specified type. + virtual bool allowsUnalignedMemoryAccesses(EVT VT) const { + return true; + } /// LowerOperation - Provide custom lowering hooks for some operations. /// @@ -395,7 +413,8 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; /// getTargetNodeName - This method returns the name of a target specific @@ -403,7 +422,7 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual MVT getSetCCResultType(MVT VT) const; + virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; /// computeMaskedBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the @@ -420,13 +439,15 @@ namespace llvm { SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); + virtual bool ExpandInlineAsm(CallInst *CI) const; + ConstraintType getConstraintType(const std::string &Constraint) const; std::vector<unsigned> getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; - virtual const char *LowerXConstraint(MVT ConstraintVT) const; + virtual const char *LowerXConstraint(EVT ConstraintVT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is @@ -444,7 +465,7 @@ namespace llvm { /// error, this returns a register number of 0. std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. @@ -454,7 +475,7 @@ namespace llvm { /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in /// register EAX to i16 by referencing its sub-register AX. virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const; - virtual bool isTruncateFree(MVT VT1, MVT VT2) const; + virtual bool isTruncateFree(EVT VT1, EVT VT2) const; /// isZExtFree - Return true if any actual instruction that defines a /// value of type Ty1 implicit zero-extends the value to Ty2 in the result @@ -465,31 +486,31 @@ namespace llvm { /// all instructions that define 32-bit values implicit zero-extend the /// result out to 64 bits. virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const; - virtual bool isZExtFree(MVT VT1, MVT VT2) const; + virtual bool isZExtFree(EVT VT1, EVT VT2) const; /// isNarrowingProfitable - Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow /// from i32 to i8 but not from i32 to i16. - virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const; + virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const; /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask /// values are assumed to be legal. virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask, - MVT VT) const; + EVT VT) const; /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is /// used by Targets can use this to indicate if there is a suitable /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant /// pool entry. virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, - MVT VT) const; + EVT VT) const; /// ShouldShrinkFPConstant - If true, then instruction selection should /// seek to shrink the FP constant of the specified type to a smaller type /// in order to save space and / or reduce runtime. - virtual bool ShouldShrinkFPConstant(MVT VT) const { + virtual bool ShouldShrinkFPConstant(EVT VT) const { // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more // expensive than a straight movsd. On the other hand, it's important to // shrink long double fp constant since fldt is very slow. @@ -497,11 +518,14 @@ namespace llvm { } /// IsEligibleForTailCallOptimization - Check whether the call is eligible - /// for tail call optimization. Target which want to do tail call + /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. - virtual bool IsEligibleForTailCallOptimization(CallSDNode *TheCall, - SDValue Ret, - SelectionDAG &DAG) const; + virtual bool + IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; virtual const X86Subtarget* getSubtarget() { return Subtarget; @@ -509,17 +533,17 @@ namespace llvm { /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is /// computed in an SSE register, not on the X87 floating point stack. - bool isScalarFPTypeInSSEReg(MVT VT) const { + bool isScalarFPTypeInSSEReg(EVT VT) const { return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } /// getWidenVectorType: given a vector type, returns the type to widen /// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself. - /// If there is no vector type that we want to widen to, returns MVT::Other + /// If there is no vector type that we want to widen to, returns EVT::Other /// When and were to widen is target dependent based on the cost of /// scalarizing vs using the wider vector type. - virtual MVT getWidenVectorType(MVT VT) const; + virtual EVT getWidenVectorType(EVT VT) const; /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. @@ -554,28 +578,30 @@ namespace llvm { bool X86ScalarSSEf32; bool X86ScalarSSEf64; - SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, - unsigned CallingConv, SelectionDAG &DAG); - - SDValue LowerMemArgument(SDValue Op, SelectionDAG &DAG, - const CCValAssign &VA, MachineFrameInfo *MFI, - unsigned CC, SDValue Root, unsigned i); - - SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, - const SDValue &StackPtr, - const CCValAssign &VA, SDValue Chain, - SDValue Arg, ISD::ArgFlagsTy Flags); + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals); + SDValue LowerMemArgument(SDValue Chain, + CallingConv::ID CallConv, + const SmallVectorImpl<ISD::InputArg> &ArgInfo, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, MachineFrameInfo *MFI, + unsigned i); + SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + ISD::ArgFlagsTy Flags); // Call lowering helpers. - bool IsCalleePop(bool isVarArg, unsigned CallingConv); - bool CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall); - bool CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall); + bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv); SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, int FPDiff, DebugLoc dl); - CCAssignFn *CCAssignFnForNode(unsigned CallingConv) const; - NameDecorationStyle NameDecorationForFORMAL_ARGUMENTS(SDValue Op); + CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const; + NameDecorationStyle NameDecorationForCallConv(CallingConv::ID CallConv); unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG); std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, @@ -595,7 +621,7 @@ namespace llvm { SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG); SDValue LowerShift(SDValue Op, SelectionDAG &DAG); - SDValue BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain, SDValue StackSlot, + SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG); SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG); SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG); @@ -612,10 +638,7 @@ namespace llvm { SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG); SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG); SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); - SDValue LowerRET(SDValue Op, SelectionDAG &DAG); SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); - SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG); SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG); @@ -635,6 +658,26 @@ namespace llvm { SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG); SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG); + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals); + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + DebugLoc dl, SelectionDAG &DAG); + void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, unsigned NewOp); @@ -651,9 +694,17 @@ namespace llvm { const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff); + /// Utility function to emit string processing sse4.2 instructions + /// that return in xmm0. + /// This takes the instruction to expand, the associated machine basic + /// block, the number of args, and whether or not the second arg is + /// in memory or not. + MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB, + unsigned argNum, bool inMem) const; + /// Utility function to emit atomic bitwise operations (and, or, xor). - // It takes the bitwise instruction to expand, the associated machine basic - // block, and the associated X86 opcodes for reg/reg and reg/imm. + /// It takes the bitwise instruction to expand, the associated machine basic + /// block, and the associated X86 opcodes for reg/reg and reg/imm. MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter( MachineInstr *BInstr, MachineBasicBlock *BB, @@ -683,6 +734,15 @@ namespace llvm { MachineBasicBlock *BB, unsigned cmovOpc) const; + /// Utility function to emit the xmm reg save portion of va_start. + MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( + MachineInstr *BInstr, + MachineBasicBlock *BB) const; + + MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 472ba4c..ef19823 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -28,26 +28,29 @@ def i64i32imm_pcrel : Operand<i64> { // 64-bits but only 8 bits are significant. -def i64i8imm : Operand<i64>; +def i64i8imm : Operand<i64> { + let ParserMatchClass = ImmSExt8AsmOperand; +} def lea64mem : Operand<i64> { let PrintMethod = "printlea64mem"; - let MIOperandInfo = (ops GR64, i8imm, GR64, i32imm); + let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm); + let ParserMatchClass = X86MemAsmOperand; } def lea64_32mem : Operand<i32> { let PrintMethod = "printlea64_32mem"; let AsmOperandLowerMethod = "lower_lea64_32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm); + let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); + let ParserMatchClass = X86MemAsmOperand; } //===----------------------------------------------------------------------===// // Complex Pattern Definitions. // def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr", - [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper, - X86WrapperRIP], - []>; + [add, sub, mul, X86mul_imm, shl, or, frameindex, + X86WrapperRIP], []>; def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; @@ -129,13 +132,40 @@ let isCall = 1 in def CALL64pcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), "call\t$dst", []>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, NotWin64]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), - "call\t{*}$dst", [(X86call GR64:$dst)]>; + "call\t{*}$dst", [(X86call GR64:$dst)]>, + Requires<[NotWin64]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), - "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>; + "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, + Requires<[NotWin64]>; + + def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), + "lcall{q}\t{*}$dst", []>; } + // FIXME: We need to teach codegen about single list of call-clobbered registers. +let isCall = 1 in + // All calls clobber the non-callee saved registers. RSP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], + Uses = [RSP] in { + def WINCALL64pcrel32 : I<0xE8, RawFrm, + (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + "call\t$dst", []>, + Requires<[IsWin64]>; + def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), + "call\t{*}$dst", + [(X86call GR64:$dst)]>, Requires<[IsWin64]>; + def WINCALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), + "call\t{*}$dst", + [(X86call (loadi64 addr:$dst))]>, Requires<[IsWin64]>; + } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in @@ -162,6 +192,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { [(brind GR64:$dst)]>; def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", [(brind (loadi64 addr:$dst))]>; + def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), + "ljmp{q}\t{*}$dst", []>; } //===----------------------------------------------------------------------===// @@ -182,12 +214,18 @@ let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in def LEAVE64 : I<0xC9, RawFrm, (outs), (ins), "leave", []>; let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { -let mayLoad = 1 in +let mayLoad = 1 in { def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; -let mayStore = 1 in +def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; +def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>; +} +let mayStore = 1 in { def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", []>; +def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>; +def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>; +} } let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { @@ -246,6 +284,14 @@ let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", [(X86rep_stos i64)]>, REP; +def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scas{q}", []>; + +def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmps{q}", []>; + +// Fast system-call instructions +def SYSEXIT64 : RI<0x35, RawFrm, + (outs), (ins), "sysexit", []>, TB; + //===----------------------------------------------------------------------===// // Move Instructions... // @@ -275,6 +321,25 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32:$src, addr:$dst)]>; +def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins i8imm:$src), + "mov{q}\t{$src, %rax|%rax, $src}", []>; +def MOV64o32a : RIi32<0xA1, RawFrm, (outs), (ins i32imm:$src), + "mov{q}\t{$src, %rax|%rax, $src}", []>; +def MOV64ao8 : RIi8<0xA2, RawFrm, (outs i8imm:$dst), (ins), + "mov{q}\t{%rax, $dst|$dst, %rax}", []>; +def MOV64ao32 : RIi32<0xA3, RawFrm, (outs i32imm:$dst), (ins), + "mov{q}\t{%rax, $dst|$dst, %rax}", []>; + +// Moves to and from segment registers +def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; + // Sign/Zero extenders // MOVSX64rr8 always has a REX prefix and it has an 8-bit register @@ -332,13 +397,15 @@ def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; // Any instruction that defines a 32-bit result leaves the high half of the -// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may -// be copying from a truncate, but any other 32-bit operation will zero-extend +// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may +// be copying from a truncate. And x86's cmov doesn't do anything if the +// condition is false. But any other 32-bit operation will zero-extend // up to 64 bits. def def32 : PatLeaf<(i32 GR32:$src), [{ return N->getOpcode() != ISD::TRUNCATE && N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG && - N->getOpcode() != ISD::CopyFromReg; + N->getOpcode() != ISD::CopyFromReg && + N->getOpcode() != X86ISD::CMOV; }]>; // In the case of a 32-bit def that is known to implicitly zero-extend, @@ -361,6 +428,10 @@ let neverHasSideEffects = 1 in { // let Defs = [EFLAGS] in { + +def ADD64i32 : RI<0x05, RawFrm, (outs), (ins i32imm:$src), + "add{q}\t{$src, %rax|%rax, $src}", []>; + let isTwoAddress = 1 in { let isConvertibleToThreeAddress = 1 in { let isCommutable = 1 in @@ -386,6 +457,12 @@ def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem: "add{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))), (implicit EFLAGS)]>; + +// Register-Register Addition - Equivalent to the normal rr form (ADD64rr), but +// differently encoded. +def ADD64mrmrr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", []>; + } // isTwoAddress // Memory-Register Addition @@ -403,6 +480,10 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2), (implicit EFLAGS)]>; let Uses = [EFLAGS] in { + +def ADC64i32 : RI<0x15, RawFrm, (outs), (ins i32imm:$src), + "adc{q}\t{$src, %rax|%rax, $src}", []>; + let isTwoAddress = 1 in { let isCommutable = 1 in def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -458,6 +539,9 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), (implicit EFLAGS)]>; } // isTwoAddress +def SUB64i32 : RI<0x2D, RawFrm, (outs), (ins i32imm:$src), + "sub{q}\t{$src, %rax|%rax, $src}", []>; + // Memory-Register Subtraction def SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", @@ -494,6 +578,9 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm: [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress +def SBB64i32 : RI<0x1D, RawFrm, (outs), (ins i32imm:$src), + "sbb{q}\t{$src, %rax|%rax, $src}", []>; + def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>; @@ -665,8 +752,10 @@ let isConvertibleToThreeAddress = 1 in // Can transform into LEA. def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "shl{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; -// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is -// cheaper. +// NOTE: We don't include patterns for shifts of a register by one, because +// 'add reg,reg' is cheaper. +def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), + "shr{q}\t$dst", []>; } // isTwoAddress let Uses = [CL] in @@ -729,6 +818,39 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; // Rotate instructions + +let isTwoAddress = 1 in { +def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), + "rcl{q}\t{1, $dst|$dst, 1}", []>; +def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), + "rcl{q}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; +def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; +} +def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; + +def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src), + "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), + "rcr{q}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +} +def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; +} + let isTwoAddress = 1 in { let Uses = [CL] in def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src), @@ -839,6 +961,9 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", [(store (not (loadi64 addr:$dst)), addr:$dst)]>; let Defs = [EFLAGS] in { +def AND64i32 : RI<0x25, RawFrm, (outs), (ins i32imm:$src), + "and{q}\t{$src, %rax|%rax, $src}", []>; + let isTwoAddress = 1 in { let isCommutable = 1 in def AND64rr : RI<0x21, MRMDestReg, @@ -912,6 +1037,9 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src), [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; +def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i32imm:$src), + "or{q}\t{$src, %rax|%rax, $src}", []>; + let isTwoAddress = 1 in { let isCommutable = 1 in def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -945,6 +1073,10 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src), "xor{q}\t{$src, $dst|$dst, $src}", [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; + +def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src), + "xor{q}\t{$src, %rax|%rax, $src}", []>; + } // Defs = [EFLAGS] //===----------------------------------------------------------------------===// @@ -953,6 +1085,8 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src), // Integer comparison let Defs = [EFLAGS] in { +def TEST64i32 : RI<0xa9, RawFrm, (outs), (ins i32imm:$src), + "test{q}\t{$src, %rax|%rax, $src}", []>; let isCommutable = 1 in def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "test{q}\t{$src2, $src1|$src1, $src2}", @@ -973,10 +1107,15 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs), [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0), (implicit EFLAGS)]>; + +def CMP64i32 : RI<0x3D, RawFrm, (outs), (ins i32imm:$src), + "cmp{q}\t{$src, %rax|%rax, $src}", []>; def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", [(X86cmp GR64:$src1, GR64:$src2), (implicit EFLAGS)]>; +def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), + "cmp{q}\t{$src2, $src1|$src1, $src2}", []>; def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", [(X86cmp (loadi64 addr:$src1), GR64:$src2), @@ -1306,14 +1445,12 @@ def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src) // Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's // equivalent due to implicit zero-extending, and it sometimes has a smaller // encoding. -// FIXME: remove when we can teach regalloc that xor reg, reg is ok. -// FIXME: AddedComplexity gives MOV64r0 a higher priority than MOV64ri32. Remove +// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove // when we have a better way to specify isel priority. -let Defs = [EFLAGS], AddedComplexity = 1, - isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), - "xor{l}\t${dst:subreg32}, ${dst:subreg32}", - [(set GR64:$dst, 0)]>; +let AddedComplexity = 1 in +def : Pat<(i64 0), + (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>; + // Materialize i64 constant where top 32-bits are zero. let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in @@ -1343,12 +1480,12 @@ def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym), [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode]>; -let AddedComplexity = 5 in +let AddedComplexity = 5, isCodeGenOnly = 1 in def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "movq\t%gs:$src, $dst", [(set GR64:$dst, (gsload addr:$src))]>, SegGS; -let AddedComplexity = 5 in +let AddedComplexity = 5, isCodeGenOnly = 1 in def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "movq\t%fs:$src, $dst", [(set GR64:$dst, (fsload addr:$src))]>, SegFS; @@ -1371,11 +1508,43 @@ def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), "xadd\t$val, $ptr", [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, TB, LOCK; + def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), "xchg\t$val, $ptr", [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>; } +// Optimized codegen when the non-memory output is not used. +// FIXME: Use normal add / sub instructions and add lock prefix dynamically. +def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs), + (ins i64mem:$dst, i64i8imm :$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs), + (ins i64mem:$dst, i64i32imm :$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs), + (ins i64mem:$dst, i64i8imm :$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs), + (ins i64mem:$dst, i64i32imm:$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), + "lock\n\t" + "inc{q}\t$dst", []>, LOCK; +def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), + "lock\n\t" + "dec{q}\t$dst", []>, LOCK; + // Atomic exchange, and, or, xor let Constraints = "$val = $dst", Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { @@ -1405,78 +1574,88 @@ def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>; } +// Segmentation support instructions + +// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo. +def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; +def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), + "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; + +// String manipulation instructions + +def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// -// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable +// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small +// code model mode, should use 'movabs'. FIXME: This is really a hack, the +// 'movabs' predicate should handle this sort of thing. +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri tconstpool :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri tjumptable :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri texternalsym:$dst)>, Requires<[FarData]>; + +// In static codegen with small code model, we can get the address of a label +// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of +// the MOV64ri64i32 should accept these. +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>; + +// In kernel code model, we can get the address of a label +// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of +// the MOV64ri32 should accept these. def : Pat<(i64 (X86Wrapper tconstpool :$dst)), - (MOV64ri tconstpool :$dst)>, Requires<[NotSmallCode]>; + (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tjumptable :$dst)), - (MOV64ri tjumptable :$dst)>, Requires<[NotSmallCode]>; + (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri tglobaladdr :$dst)>, Requires<[NotSmallCode]>; + (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper texternalsym:$dst)), - (MOV64ri texternalsym:$dst)>, Requires<[NotSmallCode]>; + (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; // If we have small model and -static mode, it is safe to store global addresses // directly as immediates. FIXME: This is really a hack, the 'imm' predicate -// should handle this sort of thing. +// for MOV64mi32 should handle this sort of thing. def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), (MOV64mi32 addr:$dst, tconstpool:$src)>, - Requires<[SmallCode, IsStatic]>; + Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst), (MOV64mi32 addr:$dst, tjumptable:$src)>, - Requires<[SmallCode, IsStatic]>; + Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), (MOV64mi32 addr:$dst, tglobaladdr:$src)>, - Requires<[SmallCode, IsStatic]>; + Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), (MOV64mi32 addr:$dst, texternalsym:$src)>, - Requires<[SmallCode, IsStatic]>; - -// If we have small model and -static mode, it is safe to store global addresses -// directly as immediates. FIXME: This is really a hack, the 'imm' predicate -// should handle this sort of thing. -def : Pat<(store (i64 (X86WrapperRIP tconstpool:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tconstpool:$src)>, - Requires<[SmallCode, IsStatic]>; -def : Pat<(store (i64 (X86WrapperRIP tjumptable:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tjumptable:$src)>, - Requires<[SmallCode, IsStatic]>; -def : Pat<(store (i64 (X86WrapperRIP tglobaladdr:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tglobaladdr:$src)>, - Requires<[SmallCode, IsStatic]>; -def : Pat<(store (i64 (X86WrapperRIP texternalsym:$src)), addr:$dst), - (MOV64mi32 addr:$dst, texternalsym:$src)>, - Requires<[SmallCode, IsStatic]>; - + Requires<[NearData, IsStatic]>; // Calls // Direct PC relative function call for small code model. 32-bit displacement // sign extended to 64-bit. def : Pat<(X86call (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>; + (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>; def : Pat<(X86call (i64 texternalsym:$dst)), - (CALL64pcrel32 texternalsym:$dst)>; - -def : Pat<(X86tailcall (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>; -def : Pat<(X86tailcall (i64 texternalsym:$dst)), - (CALL64pcrel32 texternalsym:$dst)>; - -def : Pat<(X86tailcall GR64:$dst), - (CALL64r GR64:$dst)>; + (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>; +def : Pat<(X86call (i64 tglobaladdr:$dst)), + (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>; +def : Pat<(X86call (i64 texternalsym:$dst)), + (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>; // tailcall stuff -def : Pat<(X86tailcall GR32:$dst), - (TAILCALL)>; -def : Pat<(X86tailcall (i64 tglobaladdr:$dst)), - (TAILCALL)>; -def : Pat<(X86tailcall (i64 texternalsym:$dst)), - (TAILCALL)>; - def : Pat<(X86tcret GR64:$dst, imm:$off), (TCRETURNri64 GR64:$dst, imm:$off)>; @@ -1540,30 +1719,15 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; // For other extloads, use subregs, since the high contents of the register are // defined after an extload. def : Pat<(extloadi64i32 addr:$src), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOV32rm addr:$src), + (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), x86_subreg_32bit)>; -def : Pat<(extloadi16i1 addr:$src), - (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src), - x86_subreg_8bit)>, - Requires<[In64BitMode]>; -def : Pat<(extloadi16i8 addr:$src), - (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src), - x86_subreg_8bit)>, - Requires<[In64BitMode]>; - -// anyext -def : Pat<(i64 (anyext GR8:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>; -def : Pat<(i64 (anyext GR16:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; -def : Pat<(i64 (anyext GR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, x86_subreg_32bit)>; -def : Pat<(i16 (anyext GR8:$src)), - (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>, - Requires<[In64BitMode]>; -def : Pat<(i32 (anyext GR8:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>, - Requires<[In64BitMode]>; + +// anyext. Define these to do an explicit zero-extend to +// avoid partial-register updates. +def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; +def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; +def : Pat<(i64 (anyext GR32:$src)), + (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; //===----------------------------------------------------------------------===// // Some peepholes @@ -1661,6 +1825,11 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; +def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), @@ -1668,6 +1837,13 @@ def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi)), x86_subreg_32bit)>; +def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi)), + x86_subreg_32bit)>; // h-register extract and store. def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), @@ -1906,6 +2082,102 @@ def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst), (implicit EFLAGS)), (DEC64m addr:$dst)>; +// Register-Register Logical Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (OR64rr GR64:$src1, GR64:$src2)>; + +// Register-Integer Logical Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (OR64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Register-Memory Logical Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)), + (OR64rm GR64:$src1, addr:$src2)>; + +// Memory-Register Logical Or with EFLAGS result +def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), GR64:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR64mr addr:$dst, GR64:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR64mi8 addr:$dst, i64immSExt8:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt32:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR64mi32 addr:$dst, i64immSExt32:$src2)>; + +// Register-Register Logical XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (XOR64rr GR64:$src1, GR64:$src2)>; + +// Register-Integer Logical XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Register-Memory Logical XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)), + (XOR64rm GR64:$src1, addr:$src2)>; + +// Memory-Register Logical XOr with EFLAGS result +def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), GR64:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR64mr addr:$dst, GR64:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR64mi8 addr:$dst, i64immSExt8:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt32:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR64mi32 addr:$dst, i64immSExt32:$src2)>; + +// Register-Register Logical And with EFLAGS result +def : Pat<(parallel (X86and_flag GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (AND64rr GR64:$src1, GR64:$src2)>; + +// Register-Integer Logical And with EFLAGS result +def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Register-Memory Logical And with EFLAGS result +def : Pat<(parallel (X86and_flag GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)), + (AND64rm GR64:$src1, addr:$src2)>; + +// Memory-Register Logical And with EFLAGS result +def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), GR64:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND64mr addr:$dst, GR64:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND64mi8 addr:$dst, i64immSExt8:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt32:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND64mi32 addr:$dst, i64immSExt32:$src2)>; + //===----------------------------------------------------------------------===// // X86-64 SSE Instructions //===----------------------------------------------------------------------===// @@ -1977,3 +2249,15 @@ let isTwoAddress = 1 in { } defm PINSRQ : SS41I_insert64<0x22, "pinsrq">; + +// -disable-16bit support. +def : Pat<(truncstorei16 (i64 imm:$src), addr:$dst), + (MOV16mi addr:$dst, imm:$src)>; +def : Pat<(truncstorei16 GR64:$src, addr:$dst), + (MOV16mr addr:$dst, (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>; +def : Pat<(i64 (sextloadi16 addr:$dst)), + (MOVSX64rm16 addr:$dst)>; +def : Pat<(i64 (zextloadi16 addr:$dst)), + (MOVZX64rm16 addr:$dst)>; +def : Pat<(i64 (extloadi16 addr:$dst)), + (MOVZX64rm16 addr:$dst)>; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 6359542..c475b56 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" namespace llvm { @@ -47,7 +48,7 @@ struct X86AddressMode { unsigned Scale; unsigned IndexReg; - unsigned Disp; + int Disp; GlobalValue *GV; unsigned GVOpFlags; @@ -61,20 +62,20 @@ struct X86AddressMode { /// current instruction -- that is, a dereference of an address in a register, /// with no scale, index or displacement. An example is: DWORD PTR [EAX]. /// -inline const MachineInstrBuilder &addDirectMem(const MachineInstrBuilder &MIB, - unsigned Reg) { +static inline const MachineInstrBuilder & +addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) { // Because memory references are always represented with four // values, this adds: Reg, [1, NoReg, 0] to the instruction. return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0); } -inline const MachineInstrBuilder &addLeaOffset(const MachineInstrBuilder &MIB, - int Offset) { +static inline const MachineInstrBuilder & +addLeaOffset(const MachineInstrBuilder &MIB, int Offset) { return MIB.addImm(1).addReg(0).addImm(Offset); } -inline const MachineInstrBuilder &addOffset(const MachineInstrBuilder &MIB, - int Offset) { +static inline const MachineInstrBuilder & +addOffset(const MachineInstrBuilder &MIB, int Offset) { return addLeaOffset(MIB, Offset).addReg(0); } @@ -82,29 +83,29 @@ inline const MachineInstrBuilder &addOffset(const MachineInstrBuilder &MIB, /// [Reg + Offset], i.e., one with no scale or index, but with a /// displacement. An example is: DWORD PTR [EAX + 4]. /// -inline const MachineInstrBuilder &addRegOffset(const MachineInstrBuilder &MIB, - unsigned Reg, bool isKill, - int Offset) { +static inline const MachineInstrBuilder & +addRegOffset(const MachineInstrBuilder &MIB, + unsigned Reg, bool isKill, int Offset) { return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); } -inline const MachineInstrBuilder &addLeaRegOffset(const MachineInstrBuilder &MIB, - unsigned Reg, bool isKill, - int Offset) { +static inline const MachineInstrBuilder & +addLeaRegOffset(const MachineInstrBuilder &MIB, + unsigned Reg, bool isKill, int Offset) { return addLeaOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); } /// addRegReg - This function is used to add a memory reference of the form: /// [Reg + Reg]. -inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB, +static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) { return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1) .addReg(Reg2, getKillRegState(isKill2)).addImm(0); } -inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB, - const X86AddressMode &AM) { +static inline const MachineInstrBuilder & +addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) { assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); if (AM.BaseType == X86AddressMode::RegBase) @@ -120,8 +121,9 @@ inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB, return MIB.addImm(AM.Disp); } -inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, - const X86AddressMode &AM) { +static inline const MachineInstrBuilder & +addFullAddress(const MachineInstrBuilder &MIB, + const X86AddressMode &AM) { return addLeaAddress(MIB, AM).addReg(0); } @@ -130,7 +132,7 @@ inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, /// reference has base register as the FrameIndex offset until it is resolved. /// This allows a constant offset to be specified as well... /// -inline const MachineInstrBuilder & +static inline const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { MachineInstr *MI = MIB; MachineFunction &MF = *MI->getParent()->getParent(); @@ -141,11 +143,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { Flags |= MachineMemOperand::MOLoad; if (TID.mayStore()) Flags |= MachineMemOperand::MOStore; - MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FI), - Flags, - MFI.getObjectOffset(FI) + Offset, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + Flags, Offset, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); return addOffset(MIB.addFrameIndex(FI), Offset) .addMemOperand(MMO); } @@ -157,7 +159,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { /// the GlobalBaseReg parameter can be used to make this a /// GlobalBaseReg-relative reference. /// -inline const MachineInstrBuilder & +static inline const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags) { //FIXME: factor this diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index bc7def4..7e37373 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -303,6 +303,31 @@ def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>; } def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9; +// Versions of FP instructions that take a single memory operand. Added for the +// disassembler; remove as they are included with patterns elsewhere. +def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom\t$src">; +def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp\t$src">; + +def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">; +def FSTENVm : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fstenv\t$dst">; + +def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">; +def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">; + +def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom\t$src">; +def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp\t$src">; + +def FISTTP32m: FPI<0xDD, MRM1m, (outs i32mem:$dst), (ins), "fisttp{l}\t$dst">; +def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">; +def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fsave\t$dst">; +def FSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fstsw\t$dst">; + +def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{w}\t$src">; +def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{w}\t$src">; + +def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">; +def FBSTPm : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">; + // Floating point cmovs. multiclass FPCMov<PatLeaf cc> { def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index eeed5bd..abdb313 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -79,6 +79,7 @@ class XD { bits<4> Prefix = 11; } class XS { bits<4> Prefix = 12; } class T8 { bits<4> Prefix = 13; } class TA { bits<4> Prefix = 14; } +class TF { bits<4> Prefix = 15; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr> @@ -142,6 +143,24 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern> let Pattern = pattern; } +// Templates for instructions that use a 16- or 32-bit segmented address as +// their only operand: lcall (FAR CALL) and ljmp (FAR JMP) +// +// Iseg16 - 16-bit segment selector, 16-bit offset +// Iseg32 - 16-bit segment selector, 32-bit offset + +class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} + +class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} + // SSE1 Instruction Templates: // // SSI - SSE1 instructions with XS prefix. @@ -229,6 +248,16 @@ class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>; +// SS42FI - SSE 4.2 instructions with TF prefix. +class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>; + +// SS42AI = SSE 4.2 instructions with TA prefix +class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>; + // X86-64 Instruction templates... // @@ -282,4 +311,3 @@ class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> patter : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>; class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>; - diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e5d84c5..e8a39d1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -18,8 +18,8 @@ #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -27,24 +27,24 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" using namespace llvm; -namespace { - cl::opt<bool> - NoFusing("disable-spill-fusing", - cl::desc("Disable fusing of spill code into instructions")); - cl::opt<bool> - PrintFailedFusing("print-failed-fuse-candidates", - cl::desc("Print instructions that the allocator wants to" - " fuse, but the X86 backend currently can't"), - cl::Hidden); - cl::opt<bool> - ReMatPICStubLoad("remat-pic-stub-load", - cl::desc("Re-materialize load from stub in PIC mode"), - cl::init(false), cl::Hidden); -} +static cl::opt<bool> +NoFusing("disable-spill-fusing", + cl::desc("Disable fusing of spill code into instructions")); +static cl::opt<bool> +PrintFailedFusing("print-failed-fuse-candidates", + cl::desc("Print instructions that the allocator wants to" + " fuse, but the X86 backend currently can't"), + cl::Hidden); +static cl::opt<bool> +ReMatPICStubLoad("remat-pic-stub-load", + cl::desc("Re-materialize load from stub in PIC mode"), + cl::init(false), cl::Hidden); X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), @@ -212,9 +212,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) unsigned RegOp = OpTbl2Addr[i][0]; unsigned MemOp = OpTbl2Addr[i][1]; if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,0))).second) assert(false && "Duplicated entries?"); - unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store + // Index 0, folded load and store, no alignment requirement. + unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, std::make_pair(RegOp, AuxInfo))).second) @@ -222,93 +223,94 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) } // If the third value is 1, then it's folding either a load or a store. - static const unsigned OpTbl0[][3] = { - { X86::BT16ri8, X86::BT16mi8, 1 }, - { X86::BT32ri8, X86::BT32mi8, 1 }, - { X86::BT64ri8, X86::BT64mi8, 1 }, - { X86::CALL32r, X86::CALL32m, 1 }, - { X86::CALL64r, X86::CALL64m, 1 }, - { X86::CMP16ri, X86::CMP16mi, 1 }, - { X86::CMP16ri8, X86::CMP16mi8, 1 }, - { X86::CMP16rr, X86::CMP16mr, 1 }, - { X86::CMP32ri, X86::CMP32mi, 1 }, - { X86::CMP32ri8, X86::CMP32mi8, 1 }, - { X86::CMP32rr, X86::CMP32mr, 1 }, - { X86::CMP64ri32, X86::CMP64mi32, 1 }, - { X86::CMP64ri8, X86::CMP64mi8, 1 }, - { X86::CMP64rr, X86::CMP64mr, 1 }, - { X86::CMP8ri, X86::CMP8mi, 1 }, - { X86::CMP8rr, X86::CMP8mr, 1 }, - { X86::DIV16r, X86::DIV16m, 1 }, - { X86::DIV32r, X86::DIV32m, 1 }, - { X86::DIV64r, X86::DIV64m, 1 }, - { X86::DIV8r, X86::DIV8m, 1 }, - { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0 }, - { X86::FsMOVAPDrr, X86::MOVSDmr, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSmr, 0 }, - { X86::IDIV16r, X86::IDIV16m, 1 }, - { X86::IDIV32r, X86::IDIV32m, 1 }, - { X86::IDIV64r, X86::IDIV64m, 1 }, - { X86::IDIV8r, X86::IDIV8m, 1 }, - { X86::IMUL16r, X86::IMUL16m, 1 }, - { X86::IMUL32r, X86::IMUL32m, 1 }, - { X86::IMUL64r, X86::IMUL64m, 1 }, - { X86::IMUL8r, X86::IMUL8m, 1 }, - { X86::JMP32r, X86::JMP32m, 1 }, - { X86::JMP64r, X86::JMP64m, 1 }, - { X86::MOV16ri, X86::MOV16mi, 0 }, - { X86::MOV16rr, X86::MOV16mr, 0 }, - { X86::MOV32ri, X86::MOV32mi, 0 }, - { X86::MOV32rr, X86::MOV32mr, 0 }, - { X86::MOV64ri32, X86::MOV64mi32, 0 }, - { X86::MOV64rr, X86::MOV64mr, 0 }, - { X86::MOV8ri, X86::MOV8mi, 0 }, - { X86::MOV8rr, X86::MOV8mr, 0 }, - { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0 }, - { X86::MOVAPDrr, X86::MOVAPDmr, 0 }, - { X86::MOVAPSrr, X86::MOVAPSmr, 0 }, - { X86::MOVDQArr, X86::MOVDQAmr, 0 }, - { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 }, - { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 }, - { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0 }, - { X86::MOVSDrr, X86::MOVSDmr, 0 }, - { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 }, - { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0 }, - { X86::MOVSSrr, X86::MOVSSmr, 0 }, - { X86::MOVUPDrr, X86::MOVUPDmr, 0 }, - { X86::MOVUPSrr, X86::MOVUPSmr, 0 }, - { X86::MUL16r, X86::MUL16m, 1 }, - { X86::MUL32r, X86::MUL32m, 1 }, - { X86::MUL64r, X86::MUL64m, 1 }, - { X86::MUL8r, X86::MUL8m, 1 }, - { X86::SETAEr, X86::SETAEm, 0 }, - { X86::SETAr, X86::SETAm, 0 }, - { X86::SETBEr, X86::SETBEm, 0 }, - { X86::SETBr, X86::SETBm, 0 }, - { X86::SETEr, X86::SETEm, 0 }, - { X86::SETGEr, X86::SETGEm, 0 }, - { X86::SETGr, X86::SETGm, 0 }, - { X86::SETLEr, X86::SETLEm, 0 }, - { X86::SETLr, X86::SETLm, 0 }, - { X86::SETNEr, X86::SETNEm, 0 }, - { X86::SETNOr, X86::SETNOm, 0 }, - { X86::SETNPr, X86::SETNPm, 0 }, - { X86::SETNSr, X86::SETNSm, 0 }, - { X86::SETOr, X86::SETOm, 0 }, - { X86::SETPr, X86::SETPm, 0 }, - { X86::SETSr, X86::SETSm, 0 }, - { X86::TAILJMPr, X86::TAILJMPm, 1 }, - { X86::TEST16ri, X86::TEST16mi, 1 }, - { X86::TEST32ri, X86::TEST32mi, 1 }, - { X86::TEST64ri32, X86::TEST64mi32, 1 }, - { X86::TEST8ri, X86::TEST8mi, 1 } + static const unsigned OpTbl0[][4] = { + { X86::BT16ri8, X86::BT16mi8, 1, 0 }, + { X86::BT32ri8, X86::BT32mi8, 1, 0 }, + { X86::BT64ri8, X86::BT64mi8, 1, 0 }, + { X86::CALL32r, X86::CALL32m, 1, 0 }, + { X86::CALL64r, X86::CALL64m, 1, 0 }, + { X86::CMP16ri, X86::CMP16mi, 1, 0 }, + { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, + { X86::CMP16rr, X86::CMP16mr, 1, 0 }, + { X86::CMP32ri, X86::CMP32mi, 1, 0 }, + { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, + { X86::CMP32rr, X86::CMP32mr, 1, 0 }, + { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, + { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, + { X86::CMP64rr, X86::CMP64mr, 1, 0 }, + { X86::CMP8ri, X86::CMP8mi, 1, 0 }, + { X86::CMP8rr, X86::CMP8mr, 1, 0 }, + { X86::DIV16r, X86::DIV16m, 1, 0 }, + { X86::DIV32r, X86::DIV32m, 1, 0 }, + { X86::DIV64r, X86::DIV64m, 1, 0 }, + { X86::DIV8r, X86::DIV8m, 1, 0 }, + { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, + { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, + { X86::IDIV16r, X86::IDIV16m, 1, 0 }, + { X86::IDIV32r, X86::IDIV32m, 1, 0 }, + { X86::IDIV64r, X86::IDIV64m, 1, 0 }, + { X86::IDIV8r, X86::IDIV8m, 1, 0 }, + { X86::IMUL16r, X86::IMUL16m, 1, 0 }, + { X86::IMUL32r, X86::IMUL32m, 1, 0 }, + { X86::IMUL64r, X86::IMUL64m, 1, 0 }, + { X86::IMUL8r, X86::IMUL8m, 1, 0 }, + { X86::JMP32r, X86::JMP32m, 1, 0 }, + { X86::JMP64r, X86::JMP64m, 1, 0 }, + { X86::MOV16ri, X86::MOV16mi, 0, 0 }, + { X86::MOV16rr, X86::MOV16mr, 0, 0 }, + { X86::MOV32ri, X86::MOV32mi, 0, 0 }, + { X86::MOV32rr, X86::MOV32mr, 0, 0 }, + { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, + { X86::MOV64rr, X86::MOV64mr, 0, 0 }, + { X86::MOV8ri, X86::MOV8mi, 0, 0 }, + { X86::MOV8rr, X86::MOV8mr, 0, 0 }, + { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, + { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, + { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, + { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, + { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, + { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, + { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 }, + { X86::MOVSDrr, X86::MOVSDmr, 0, 0 }, + { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, + { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, + { X86::MOVSSrr, X86::MOVSSmr, 0, 0 }, + { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, + { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, + { X86::MUL16r, X86::MUL16m, 1, 0 }, + { X86::MUL32r, X86::MUL32m, 1, 0 }, + { X86::MUL64r, X86::MUL64m, 1, 0 }, + { X86::MUL8r, X86::MUL8m, 1, 0 }, + { X86::SETAEr, X86::SETAEm, 0, 0 }, + { X86::SETAr, X86::SETAm, 0, 0 }, + { X86::SETBEr, X86::SETBEm, 0, 0 }, + { X86::SETBr, X86::SETBm, 0, 0 }, + { X86::SETEr, X86::SETEm, 0, 0 }, + { X86::SETGEr, X86::SETGEm, 0, 0 }, + { X86::SETGr, X86::SETGm, 0, 0 }, + { X86::SETLEr, X86::SETLEm, 0, 0 }, + { X86::SETLr, X86::SETLm, 0, 0 }, + { X86::SETNEr, X86::SETNEm, 0, 0 }, + { X86::SETNOr, X86::SETNOm, 0, 0 }, + { X86::SETNPr, X86::SETNPm, 0, 0 }, + { X86::SETNSr, X86::SETNSm, 0, 0 }, + { X86::SETOr, X86::SETOm, 0, 0 }, + { X86::SETPr, X86::SETPm, 0, 0 }, + { X86::SETSr, X86::SETSm, 0, 0 }, + { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, + { X86::TEST16ri, X86::TEST16mi, 1, 0 }, + { X86::TEST32ri, X86::TEST32mi, 1, 0 }, + { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, + { X86::TEST8ri, X86::TEST8mi, 1, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { unsigned RegOp = OpTbl0[i][0]; unsigned MemOp = OpTbl0[i][1]; + unsigned Align = OpTbl0[i][3]; if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,Align))).second) assert(false && "Duplicated entries?"); unsigned FoldedLoad = OpTbl0[i][2]; // Index 0, folded load or store. @@ -319,338 +321,342 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) AmbEntries.push_back(MemOp); } - static const unsigned OpTbl1[][2] = { - { X86::CMP16rr, X86::CMP16rm }, - { X86::CMP32rr, X86::CMP32rm }, - { X86::CMP64rr, X86::CMP64rm }, - { X86::CMP8rr, X86::CMP8rm }, - { X86::CVTSD2SSrr, X86::CVTSD2SSrm }, - { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm }, - { X86::CVTSI2SDrr, X86::CVTSI2SDrm }, - { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm }, - { X86::CVTSI2SSrr, X86::CVTSI2SSrm }, - { X86::CVTSS2SDrr, X86::CVTSS2SDrm }, - { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm }, - { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm }, - { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm }, - { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm }, - { X86::FsMOVAPDrr, X86::MOVSDrm }, - { X86::FsMOVAPSrr, X86::MOVSSrm }, - { X86::IMUL16rri, X86::IMUL16rmi }, - { X86::IMUL16rri8, X86::IMUL16rmi8 }, - { X86::IMUL32rri, X86::IMUL32rmi }, - { X86::IMUL32rri8, X86::IMUL32rmi8 }, - { X86::IMUL64rri32, X86::IMUL64rmi32 }, - { X86::IMUL64rri8, X86::IMUL64rmi8 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm }, - { X86::Int_COMISDrr, X86::Int_COMISDrm }, - { X86::Int_COMISSrr, X86::Int_COMISSrm }, - { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm }, - { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm }, - { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm }, - { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm }, - { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm }, - { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm }, - { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm }, - { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm }, - { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm }, - { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm }, - { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm }, - { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm }, - { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm }, - { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm }, - { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm }, - { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm }, - { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm }, - { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm }, - { X86::MOV16rr, X86::MOV16rm }, - { X86::MOV32rr, X86::MOV32rm }, - { X86::MOV64rr, X86::MOV64rm }, - { X86::MOV64toPQIrr, X86::MOVQI2PQIrm }, - { X86::MOV64toSDrr, X86::MOV64toSDrm }, - { X86::MOV8rr, X86::MOV8rm }, - { X86::MOVAPDrr, X86::MOVAPDrm }, - { X86::MOVAPSrr, X86::MOVAPSrm }, - { X86::MOVDDUPrr, X86::MOVDDUPrm }, - { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm }, - { X86::MOVDI2SSrr, X86::MOVDI2SSrm }, - { X86::MOVDQArr, X86::MOVDQArm }, - { X86::MOVSD2PDrr, X86::MOVSD2PDrm }, - { X86::MOVSDrr, X86::MOVSDrm }, - { X86::MOVSHDUPrr, X86::MOVSHDUPrm }, - { X86::MOVSLDUPrr, X86::MOVSLDUPrm }, - { X86::MOVSS2PSrr, X86::MOVSS2PSrm }, - { X86::MOVSSrr, X86::MOVSSrm }, - { X86::MOVSX16rr8, X86::MOVSX16rm8 }, - { X86::MOVSX32rr16, X86::MOVSX32rm16 }, - { X86::MOVSX32rr8, X86::MOVSX32rm8 }, - { X86::MOVSX64rr16, X86::MOVSX64rm16 }, - { X86::MOVSX64rr32, X86::MOVSX64rm32 }, - { X86::MOVSX64rr8, X86::MOVSX64rm8 }, - { X86::MOVUPDrr, X86::MOVUPDrm }, - { X86::MOVUPSrr, X86::MOVUPSrm }, - { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm }, - { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm }, - { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm }, - { X86::MOVZX16rr8, X86::MOVZX16rm8 }, - { X86::MOVZX32rr16, X86::MOVZX32rm16 }, - { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 }, - { X86::MOVZX32rr8, X86::MOVZX32rm8 }, - { X86::MOVZX64rr16, X86::MOVZX64rm16 }, - { X86::MOVZX64rr32, X86::MOVZX64rm32 }, - { X86::MOVZX64rr8, X86::MOVZX64rm8 }, - { X86::PSHUFDri, X86::PSHUFDmi }, - { X86::PSHUFHWri, X86::PSHUFHWmi }, - { X86::PSHUFLWri, X86::PSHUFLWmi }, - { X86::RCPPSr, X86::RCPPSm }, - { X86::RCPPSr_Int, X86::RCPPSm_Int }, - { X86::RSQRTPSr, X86::RSQRTPSm }, - { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int }, - { X86::RSQRTSSr, X86::RSQRTSSm }, - { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int }, - { X86::SQRTPDr, X86::SQRTPDm }, - { X86::SQRTPDr_Int, X86::SQRTPDm_Int }, - { X86::SQRTPSr, X86::SQRTPSm }, - { X86::SQRTPSr_Int, X86::SQRTPSm_Int }, - { X86::SQRTSDr, X86::SQRTSDm }, - { X86::SQRTSDr_Int, X86::SQRTSDm_Int }, - { X86::SQRTSSr, X86::SQRTSSm }, - { X86::SQRTSSr_Int, X86::SQRTSSm_Int }, - { X86::TEST16rr, X86::TEST16rm }, - { X86::TEST32rr, X86::TEST32rm }, - { X86::TEST64rr, X86::TEST64rm }, - { X86::TEST8rr, X86::TEST8rm }, + static const unsigned OpTbl1[][3] = { + { X86::CMP16rr, X86::CMP16rm, 0 }, + { X86::CMP32rr, X86::CMP32rm, 0 }, + { X86::CMP64rr, X86::CMP64rm, 0 }, + { X86::CMP8rr, X86::CMP8rm, 0 }, + { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, + { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, + { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, + { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, + { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, + { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, + { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, + { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, + { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, + { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, + { X86::IMUL16rri, X86::IMUL16rmi, 0 }, + { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, + { X86::IMUL32rri, X86::IMUL32rmi, 0 }, + { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, + { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, + { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, + { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, + { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, + { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, + { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, + { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, + { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, + { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, + { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, + { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, + { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, + { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, + { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, + { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, + { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, + { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, + { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, + { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, + { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, + { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, + { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, + { X86::MOV16rr, X86::MOV16rm, 0 }, + { X86::MOV32rr, X86::MOV32rm, 0 }, + { X86::MOV64rr, X86::MOV64rm, 0 }, + { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, + { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, + { X86::MOV8rr, X86::MOV8rm, 0 }, + { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, + { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, + { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, + { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, + { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, + { X86::MOVDQArr, X86::MOVDQArm, 16 }, + { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 }, + { X86::MOVSDrr, X86::MOVSDrm, 0 }, + { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, + { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, + { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 }, + { X86::MOVSSrr, X86::MOVSSrm, 0 }, + { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, + { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, + { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, + { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, + { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, + { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, + { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, + { X86::MOVUPSrr, X86::MOVUPSrm, 16 }, + { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, + { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, + { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, + { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, + { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, + { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, + { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, + { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, + { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, + { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, + { X86::PSHUFDri, X86::PSHUFDmi, 16 }, + { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, + { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, + { X86::RCPPSr, X86::RCPPSm, 16 }, + { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, + { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, + { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, + { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, + { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, + { X86::SQRTPDr, X86::SQRTPDm, 16 }, + { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, + { X86::SQRTPSr, X86::SQRTPSm, 16 }, + { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, + { X86::SQRTSDr, X86::SQRTSDm, 0 }, + { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, + { X86::SQRTSSr, X86::SQRTSSm, 0 }, + { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, + { X86::TEST16rr, X86::TEST16rm, 0 }, + { X86::TEST32rr, X86::TEST32rm, 0 }, + { X86::TEST64rr, X86::TEST64rm, 0 }, + { X86::TEST8rr, X86::TEST8rm, 0 }, // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 - { X86::UCOMISDrr, X86::UCOMISDrm }, - { X86::UCOMISSrr, X86::UCOMISSrm } + { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, + { X86::UCOMISSrr, X86::UCOMISSrm, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { unsigned RegOp = OpTbl1[i][0]; unsigned MemOp = OpTbl1[i][1]; + unsigned Align = OpTbl1[i][2]; if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,Align))).second) assert(false && "Duplicated entries?"); - unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load + // Index 1, folded load + unsigned AuxInfo = 1 | (1 << 4); if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, std::make_pair(RegOp, AuxInfo))).second) AmbEntries.push_back(MemOp); } - static const unsigned OpTbl2[][2] = { - { X86::ADC32rr, X86::ADC32rm }, - { X86::ADC64rr, X86::ADC64rm }, - { X86::ADD16rr, X86::ADD16rm }, - { X86::ADD32rr, X86::ADD32rm }, - { X86::ADD64rr, X86::ADD64rm }, - { X86::ADD8rr, X86::ADD8rm }, - { X86::ADDPDrr, X86::ADDPDrm }, - { X86::ADDPSrr, X86::ADDPSrm }, - { X86::ADDSDrr, X86::ADDSDrm }, - { X86::ADDSSrr, X86::ADDSSrm }, - { X86::ADDSUBPDrr, X86::ADDSUBPDrm }, - { X86::ADDSUBPSrr, X86::ADDSUBPSrm }, - { X86::AND16rr, X86::AND16rm }, - { X86::AND32rr, X86::AND32rm }, - { X86::AND64rr, X86::AND64rm }, - { X86::AND8rr, X86::AND8rm }, - { X86::ANDNPDrr, X86::ANDNPDrm }, - { X86::ANDNPSrr, X86::ANDNPSrm }, - { X86::ANDPDrr, X86::ANDPDrm }, - { X86::ANDPSrr, X86::ANDPSrm }, - { X86::CMOVA16rr, X86::CMOVA16rm }, - { X86::CMOVA32rr, X86::CMOVA32rm }, - { X86::CMOVA64rr, X86::CMOVA64rm }, - { X86::CMOVAE16rr, X86::CMOVAE16rm }, - { X86::CMOVAE32rr, X86::CMOVAE32rm }, - { X86::CMOVAE64rr, X86::CMOVAE64rm }, - { X86::CMOVB16rr, X86::CMOVB16rm }, - { X86::CMOVB32rr, X86::CMOVB32rm }, - { X86::CMOVB64rr, X86::CMOVB64rm }, - { X86::CMOVBE16rr, X86::CMOVBE16rm }, - { X86::CMOVBE32rr, X86::CMOVBE32rm }, - { X86::CMOVBE64rr, X86::CMOVBE64rm }, - { X86::CMOVE16rr, X86::CMOVE16rm }, - { X86::CMOVE32rr, X86::CMOVE32rm }, - { X86::CMOVE64rr, X86::CMOVE64rm }, - { X86::CMOVG16rr, X86::CMOVG16rm }, - { X86::CMOVG32rr, X86::CMOVG32rm }, - { X86::CMOVG64rr, X86::CMOVG64rm }, - { X86::CMOVGE16rr, X86::CMOVGE16rm }, - { X86::CMOVGE32rr, X86::CMOVGE32rm }, - { X86::CMOVGE64rr, X86::CMOVGE64rm }, - { X86::CMOVL16rr, X86::CMOVL16rm }, - { X86::CMOVL32rr, X86::CMOVL32rm }, - { X86::CMOVL64rr, X86::CMOVL64rm }, - { X86::CMOVLE16rr, X86::CMOVLE16rm }, - { X86::CMOVLE32rr, X86::CMOVLE32rm }, - { X86::CMOVLE64rr, X86::CMOVLE64rm }, - { X86::CMOVNE16rr, X86::CMOVNE16rm }, - { X86::CMOVNE32rr, X86::CMOVNE32rm }, - { X86::CMOVNE64rr, X86::CMOVNE64rm }, - { X86::CMOVNO16rr, X86::CMOVNO16rm }, - { X86::CMOVNO32rr, X86::CMOVNO32rm }, - { X86::CMOVNO64rr, X86::CMOVNO64rm }, - { X86::CMOVNP16rr, X86::CMOVNP16rm }, - { X86::CMOVNP32rr, X86::CMOVNP32rm }, - { X86::CMOVNP64rr, X86::CMOVNP64rm }, - { X86::CMOVNS16rr, X86::CMOVNS16rm }, - { X86::CMOVNS32rr, X86::CMOVNS32rm }, - { X86::CMOVNS64rr, X86::CMOVNS64rm }, - { X86::CMOVO16rr, X86::CMOVO16rm }, - { X86::CMOVO32rr, X86::CMOVO32rm }, - { X86::CMOVO64rr, X86::CMOVO64rm }, - { X86::CMOVP16rr, X86::CMOVP16rm }, - { X86::CMOVP32rr, X86::CMOVP32rm }, - { X86::CMOVP64rr, X86::CMOVP64rm }, - { X86::CMOVS16rr, X86::CMOVS16rm }, - { X86::CMOVS32rr, X86::CMOVS32rm }, - { X86::CMOVS64rr, X86::CMOVS64rm }, - { X86::CMPPDrri, X86::CMPPDrmi }, - { X86::CMPPSrri, X86::CMPPSrmi }, - { X86::CMPSDrr, X86::CMPSDrm }, - { X86::CMPSSrr, X86::CMPSSrm }, - { X86::DIVPDrr, X86::DIVPDrm }, - { X86::DIVPSrr, X86::DIVPSrm }, - { X86::DIVSDrr, X86::DIVSDrm }, - { X86::DIVSSrr, X86::DIVSSrm }, - { X86::FsANDNPDrr, X86::FsANDNPDrm }, - { X86::FsANDNPSrr, X86::FsANDNPSrm }, - { X86::FsANDPDrr, X86::FsANDPDrm }, - { X86::FsANDPSrr, X86::FsANDPSrm }, - { X86::FsORPDrr, X86::FsORPDrm }, - { X86::FsORPSrr, X86::FsORPSrm }, - { X86::FsXORPDrr, X86::FsXORPDrm }, - { X86::FsXORPSrr, X86::FsXORPSrm }, - { X86::HADDPDrr, X86::HADDPDrm }, - { X86::HADDPSrr, X86::HADDPSrm }, - { X86::HSUBPDrr, X86::HSUBPDrm }, - { X86::HSUBPSrr, X86::HSUBPSrm }, - { X86::IMUL16rr, X86::IMUL16rm }, - { X86::IMUL32rr, X86::IMUL32rm }, - { X86::IMUL64rr, X86::IMUL64rm }, - { X86::MAXPDrr, X86::MAXPDrm }, - { X86::MAXPDrr_Int, X86::MAXPDrm_Int }, - { X86::MAXPSrr, X86::MAXPSrm }, - { X86::MAXPSrr_Int, X86::MAXPSrm_Int }, - { X86::MAXSDrr, X86::MAXSDrm }, - { X86::MAXSDrr_Int, X86::MAXSDrm_Int }, - { X86::MAXSSrr, X86::MAXSSrm }, - { X86::MAXSSrr_Int, X86::MAXSSrm_Int }, - { X86::MINPDrr, X86::MINPDrm }, - { X86::MINPDrr_Int, X86::MINPDrm_Int }, - { X86::MINPSrr, X86::MINPSrm }, - { X86::MINPSrr_Int, X86::MINPSrm_Int }, - { X86::MINSDrr, X86::MINSDrm }, - { X86::MINSDrr_Int, X86::MINSDrm_Int }, - { X86::MINSSrr, X86::MINSSrm }, - { X86::MINSSrr_Int, X86::MINSSrm_Int }, - { X86::MULPDrr, X86::MULPDrm }, - { X86::MULPSrr, X86::MULPSrm }, - { X86::MULSDrr, X86::MULSDrm }, - { X86::MULSSrr, X86::MULSSrm }, - { X86::OR16rr, X86::OR16rm }, - { X86::OR32rr, X86::OR32rm }, - { X86::OR64rr, X86::OR64rm }, - { X86::OR8rr, X86::OR8rm }, - { X86::ORPDrr, X86::ORPDrm }, - { X86::ORPSrr, X86::ORPSrm }, - { X86::PACKSSDWrr, X86::PACKSSDWrm }, - { X86::PACKSSWBrr, X86::PACKSSWBrm }, - { X86::PACKUSWBrr, X86::PACKUSWBrm }, - { X86::PADDBrr, X86::PADDBrm }, - { X86::PADDDrr, X86::PADDDrm }, - { X86::PADDQrr, X86::PADDQrm }, - { X86::PADDSBrr, X86::PADDSBrm }, - { X86::PADDSWrr, X86::PADDSWrm }, - { X86::PADDWrr, X86::PADDWrm }, - { X86::PANDNrr, X86::PANDNrm }, - { X86::PANDrr, X86::PANDrm }, - { X86::PAVGBrr, X86::PAVGBrm }, - { X86::PAVGWrr, X86::PAVGWrm }, - { X86::PCMPEQBrr, X86::PCMPEQBrm }, - { X86::PCMPEQDrr, X86::PCMPEQDrm }, - { X86::PCMPEQWrr, X86::PCMPEQWrm }, - { X86::PCMPGTBrr, X86::PCMPGTBrm }, - { X86::PCMPGTDrr, X86::PCMPGTDrm }, - { X86::PCMPGTWrr, X86::PCMPGTWrm }, - { X86::PINSRWrri, X86::PINSRWrmi }, - { X86::PMADDWDrr, X86::PMADDWDrm }, - { X86::PMAXSWrr, X86::PMAXSWrm }, - { X86::PMAXUBrr, X86::PMAXUBrm }, - { X86::PMINSWrr, X86::PMINSWrm }, - { X86::PMINUBrr, X86::PMINUBrm }, - { X86::PMULDQrr, X86::PMULDQrm }, - { X86::PMULHUWrr, X86::PMULHUWrm }, - { X86::PMULHWrr, X86::PMULHWrm }, - { X86::PMULLDrr, X86::PMULLDrm }, - { X86::PMULLDrr_int, X86::PMULLDrm_int }, - { X86::PMULLWrr, X86::PMULLWrm }, - { X86::PMULUDQrr, X86::PMULUDQrm }, - { X86::PORrr, X86::PORrm }, - { X86::PSADBWrr, X86::PSADBWrm }, - { X86::PSLLDrr, X86::PSLLDrm }, - { X86::PSLLQrr, X86::PSLLQrm }, - { X86::PSLLWrr, X86::PSLLWrm }, - { X86::PSRADrr, X86::PSRADrm }, - { X86::PSRAWrr, X86::PSRAWrm }, - { X86::PSRLDrr, X86::PSRLDrm }, - { X86::PSRLQrr, X86::PSRLQrm }, - { X86::PSRLWrr, X86::PSRLWrm }, - { X86::PSUBBrr, X86::PSUBBrm }, - { X86::PSUBDrr, X86::PSUBDrm }, - { X86::PSUBSBrr, X86::PSUBSBrm }, - { X86::PSUBSWrr, X86::PSUBSWrm }, - { X86::PSUBWrr, X86::PSUBWrm }, - { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm }, - { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm }, - { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm }, - { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm }, - { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm }, - { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm }, - { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm }, - { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm }, - { X86::PXORrr, X86::PXORrm }, - { X86::SBB32rr, X86::SBB32rm }, - { X86::SBB64rr, X86::SBB64rm }, - { X86::SHUFPDrri, X86::SHUFPDrmi }, - { X86::SHUFPSrri, X86::SHUFPSrmi }, - { X86::SUB16rr, X86::SUB16rm }, - { X86::SUB32rr, X86::SUB32rm }, - { X86::SUB64rr, X86::SUB64rm }, - { X86::SUB8rr, X86::SUB8rm }, - { X86::SUBPDrr, X86::SUBPDrm }, - { X86::SUBPSrr, X86::SUBPSrm }, - { X86::SUBSDrr, X86::SUBSDrm }, - { X86::SUBSSrr, X86::SUBSSrm }, + static const unsigned OpTbl2[][3] = { + { X86::ADC32rr, X86::ADC32rm, 0 }, + { X86::ADC64rr, X86::ADC64rm, 0 }, + { X86::ADD16rr, X86::ADD16rm, 0 }, + { X86::ADD32rr, X86::ADD32rm, 0 }, + { X86::ADD64rr, X86::ADD64rm, 0 }, + { X86::ADD8rr, X86::ADD8rm, 0 }, + { X86::ADDPDrr, X86::ADDPDrm, 16 }, + { X86::ADDPSrr, X86::ADDPSrm, 16 }, + { X86::ADDSDrr, X86::ADDSDrm, 0 }, + { X86::ADDSSrr, X86::ADDSSrm, 0 }, + { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, + { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, + { X86::AND16rr, X86::AND16rm, 0 }, + { X86::AND32rr, X86::AND32rm, 0 }, + { X86::AND64rr, X86::AND64rm, 0 }, + { X86::AND8rr, X86::AND8rm, 0 }, + { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, + { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, + { X86::ANDPDrr, X86::ANDPDrm, 16 }, + { X86::ANDPSrr, X86::ANDPSrm, 16 }, + { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, + { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, + { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, + { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, + { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, + { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, + { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, + { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, + { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, + { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, + { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, + { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, + { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, + { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, + { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, + { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, + { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, + { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, + { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, + { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, + { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, + { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, + { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, + { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, + { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, + { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, + { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, + { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, + { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, + { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, + { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, + { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, + { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, + { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, + { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, + { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, + { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, + { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, + { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, + { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, + { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, + { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, + { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, + { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, + { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, + { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, + { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, + { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, + { X86::CMPPDrri, X86::CMPPDrmi, 16 }, + { X86::CMPPSrri, X86::CMPPSrmi, 16 }, + { X86::CMPSDrr, X86::CMPSDrm, 0 }, + { X86::CMPSSrr, X86::CMPSSrm, 0 }, + { X86::DIVPDrr, X86::DIVPDrm, 16 }, + { X86::DIVPSrr, X86::DIVPSrm, 16 }, + { X86::DIVSDrr, X86::DIVSDrm, 0 }, + { X86::DIVSSrr, X86::DIVSSrm, 0 }, + { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, + { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, + { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, + { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, + { X86::FsORPDrr, X86::FsORPDrm, 16 }, + { X86::FsORPSrr, X86::FsORPSrm, 16 }, + { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, + { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, + { X86::HADDPDrr, X86::HADDPDrm, 16 }, + { X86::HADDPSrr, X86::HADDPSrm, 16 }, + { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, + { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, + { X86::IMUL16rr, X86::IMUL16rm, 0 }, + { X86::IMUL32rr, X86::IMUL32rm, 0 }, + { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::MAXPDrr, X86::MAXPDrm, 16 }, + { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, + { X86::MAXPSrr, X86::MAXPSrm, 16 }, + { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, + { X86::MAXSDrr, X86::MAXSDrm, 0 }, + { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, + { X86::MAXSSrr, X86::MAXSSrm, 0 }, + { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, + { X86::MINPDrr, X86::MINPDrm, 16 }, + { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, + { X86::MINPSrr, X86::MINPSrm, 16 }, + { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, + { X86::MINSDrr, X86::MINSDrm, 0 }, + { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, + { X86::MINSSrr, X86::MINSSrm, 0 }, + { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, + { X86::MULPDrr, X86::MULPDrm, 16 }, + { X86::MULPSrr, X86::MULPSrm, 16 }, + { X86::MULSDrr, X86::MULSDrm, 0 }, + { X86::MULSSrr, X86::MULSSrm, 0 }, + { X86::OR16rr, X86::OR16rm, 0 }, + { X86::OR32rr, X86::OR32rm, 0 }, + { X86::OR64rr, X86::OR64rm, 0 }, + { X86::OR8rr, X86::OR8rm, 0 }, + { X86::ORPDrr, X86::ORPDrm, 16 }, + { X86::ORPSrr, X86::ORPSrm, 16 }, + { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, + { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, + { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, + { X86::PADDBrr, X86::PADDBrm, 16 }, + { X86::PADDDrr, X86::PADDDrm, 16 }, + { X86::PADDQrr, X86::PADDQrm, 16 }, + { X86::PADDSBrr, X86::PADDSBrm, 16 }, + { X86::PADDSWrr, X86::PADDSWrm, 16 }, + { X86::PADDWrr, X86::PADDWrm, 16 }, + { X86::PANDNrr, X86::PANDNrm, 16 }, + { X86::PANDrr, X86::PANDrm, 16 }, + { X86::PAVGBrr, X86::PAVGBrm, 16 }, + { X86::PAVGWrr, X86::PAVGWrm, 16 }, + { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, + { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, + { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, + { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, + { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, + { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, + { X86::PINSRWrri, X86::PINSRWrmi, 16 }, + { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, + { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, + { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, + { X86::PMINSWrr, X86::PMINSWrm, 16 }, + { X86::PMINUBrr, X86::PMINUBrm, 16 }, + { X86::PMULDQrr, X86::PMULDQrm, 16 }, + { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, + { X86::PMULHWrr, X86::PMULHWrm, 16 }, + { X86::PMULLDrr, X86::PMULLDrm, 16 }, + { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 }, + { X86::PMULLWrr, X86::PMULLWrm, 16 }, + { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, + { X86::PORrr, X86::PORrm, 16 }, + { X86::PSADBWrr, X86::PSADBWrm, 16 }, + { X86::PSLLDrr, X86::PSLLDrm, 16 }, + { X86::PSLLQrr, X86::PSLLQrm, 16 }, + { X86::PSLLWrr, X86::PSLLWrm, 16 }, + { X86::PSRADrr, X86::PSRADrm, 16 }, + { X86::PSRAWrr, X86::PSRAWrm, 16 }, + { X86::PSRLDrr, X86::PSRLDrm, 16 }, + { X86::PSRLQrr, X86::PSRLQrm, 16 }, + { X86::PSRLWrr, X86::PSRLWrm, 16 }, + { X86::PSUBBrr, X86::PSUBBrm, 16 }, + { X86::PSUBDrr, X86::PSUBDrm, 16 }, + { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, + { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, + { X86::PSUBWrr, X86::PSUBWrm, 16 }, + { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, + { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, + { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, + { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, + { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, + { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, + { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, + { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, + { X86::PXORrr, X86::PXORrm, 16 }, + { X86::SBB32rr, X86::SBB32rm, 0 }, + { X86::SBB64rr, X86::SBB64rm, 0 }, + { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, + { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, + { X86::SUB16rr, X86::SUB16rm, 0 }, + { X86::SUB32rr, X86::SUB32rm, 0 }, + { X86::SUB64rr, X86::SUB64rm, 0 }, + { X86::SUB8rr, X86::SUB8rm, 0 }, + { X86::SUBPDrr, X86::SUBPDrm, 16 }, + { X86::SUBPSrr, X86::SUBPSrm, 16 }, + { X86::SUBSDrr, X86::SUBSDrm, 0 }, + { X86::SUBSSrr, X86::SUBSSrm, 0 }, // FIXME: TEST*rr -> swapped operand of TEST*mr. - { X86::UNPCKHPDrr, X86::UNPCKHPDrm }, - { X86::UNPCKHPSrr, X86::UNPCKHPSrm }, - { X86::UNPCKLPDrr, X86::UNPCKLPDrm }, - { X86::UNPCKLPSrr, X86::UNPCKLPSrm }, - { X86::XOR16rr, X86::XOR16rm }, - { X86::XOR32rr, X86::XOR32rm }, - { X86::XOR64rr, X86::XOR64rm }, - { X86::XOR8rr, X86::XOR8rm }, - { X86::XORPDrr, X86::XORPDrm }, - { X86::XORPSrr, X86::XORPSrm } + { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, + { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, + { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, + { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, + { X86::XOR16rr, X86::XOR16rm, 0 }, + { X86::XOR32rr, X86::XOR32rm, 0 }, + { X86::XOR64rr, X86::XOR64rm, 0 }, + { X86::XOR8rr, X86::XOR8rm, 0 }, + { X86::XORPDrr, X86::XORPDrm, 16 }, + { X86::XORPSrr, X86::XORPSrm, 16 } }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { unsigned RegOp = OpTbl2[i][0]; unsigned MemOp = OpTbl2[i][1]; + unsigned Align = OpTbl2[i][2]; if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,Align))).second) assert(false && "Duplicated entries?"); - unsigned AuxInfo = 2 | (1 << 4); // Index 2, folded load + // Index 2, folded load + unsigned AuxInfo = 2 | (1 << 4); if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, std::make_pair(RegOp, AuxInfo))).second) AmbEntries.push_back(MemOp); @@ -760,7 +766,6 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } - /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { @@ -776,37 +781,9 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { return isPICBase; } -/// isGVStub - Return true if the GV requires an extra load to get the -/// real address. -static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) { - return TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false); -} - -/// CanRematLoadWithDispOperand - Return true if a load with the specified -/// operand is a candidate for remat: for this to be true we need to know that -/// the load will always return the same value, even if moved. -static bool CanRematLoadWithDispOperand(const MachineOperand &MO, - X86TargetMachine &TM) { - // Loads from constant pool entries can be remat'd. - if (MO.isCPI()) return true; - - // We can remat globals in some cases. - if (MO.isGlobal()) { - // If this is a load of a stub, not of the global, we can remat it. This - // access will always return the address of the global. - if (isGVStub(MO.getGlobal(), TM)) - return true; - - // If the global itself is constant, we can remat the load. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) - if (GV->isConstant()) - return true; - } - return false; -} - bool -X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { +X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, + AliasAnalysis *AA) const { switch (MI->getOpcode()) { default: break; case X86::MOV8rm: @@ -825,7 +802,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - CanRematLoadWithDispOperand(MI->getOperand(4), TM)) { + MI->isInvariantLoad(AA)) { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg == 0 || BaseReg == X86::RIP) return true; @@ -876,7 +853,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { /// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that /// would clobber the EFLAGS condition register. Note the result may be /// conservative. If it cannot definitely determine the safety after visiting -/// two instructions it assumes it's not safe. +/// a few instructions in each direction it assumes it's not safe. static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { // It's always safe to clobber EFLAGS at the end of a block. @@ -884,11 +861,13 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, return true; // For compile time consideration, if we are not able to determine the - // safety after visiting 2 instructions, we will assume it's not safe. - for (unsigned i = 0; i < 2; ++i) { + // safety after visiting 4 instructions in each direction, we will assume + // it's not safe. + MachineBasicBlock::iterator Iter = I; + for (unsigned i = 0; i < 4; ++i) { bool SeenDef = false; - for (unsigned j = 0, e = I->getNumOperands(); j != e; ++j) { - MachineOperand &MO = I->getOperand(j); + for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { + MachineOperand &MO = Iter->getOperand(j); if (!MO.isReg()) continue; if (MO.getReg() == X86::EFLAGS) { @@ -901,10 +880,33 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, if (SeenDef) // This instruction defines EFLAGS, no need to look any further. return true; - ++I; + ++Iter; // If we make it to the end of the block, it's safe to clobber EFLAGS. - if (I == MBB.end()) + if (Iter == MBB.end()) + return true; + } + + Iter = I; + for (unsigned i = 0; i < 4; ++i) { + // If we make it to the beginning of the block, it's safe to clobber + // EFLAGS iff EFLAGS is not live-in. + if (Iter == MBB.begin()) + return !MBB.isLiveIn(X86::EFLAGS); + + --Iter; + bool SawKill = false; + for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { + MachineOperand &MO = Iter->getOperand(j); + if (MO.isReg() && MO.getReg() == X86::EFLAGS) { + if (MO.isDef()) return MO.isDead(); + if (MO.isKill()) SawKill = true; + } + } + + if (SawKill) + // This instruction kills EFLAGS and doesn't redefine it, so + // there's no need to look further. return true; } @@ -914,14 +916,11 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, + unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); - unsigned SubIdx = Orig->getOperand(0).isReg() - ? Orig->getOperand(0).getSubReg() : 0; - bool ChangeSubIdx = SubIdx != 0; if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { DestReg = RI.getSubReg(DestReg, SubIdx); SubIdx = 0; @@ -929,76 +928,36 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, // MOV32r0 etc. are implemented with xor which clobbers condition code. // Re-materialize them as movri instructions to avoid side effects. - bool Emitted = false; - switch (Orig->getOpcode()) { + bool Clone = true; + unsigned Opc = Orig->getOpcode(); + switch (Opc) { default: break; case X86::MOV8r0: case X86::MOV16r0: - case X86::MOV32r0: - case X86::MOV64r0: { + case X86::MOV32r0: { if (!isSafeToClobberEFLAGS(MBB, I)) { - unsigned Opc = 0; - switch (Orig->getOpcode()) { + switch (Opc) { default: break; case X86::MOV8r0: Opc = X86::MOV8ri; break; case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; - case X86::MOV64r0: Opc = X86::MOV64ri32; break; } - BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); - Emitted = true; + Clone = false; } break; } } - if (!Emitted) { + if (Clone) { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); MI->getOperand(0).setReg(DestReg); MBB.insert(I, MI); + } else { + BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); } - if (ChangeSubIdx) { - MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); - } -} - -/// isInvariantLoad - Return true if the specified instruction (which is marked -/// mayLoad) is loading from a location whose value is invariant across the -/// function. For example, loading a value from the constant pool or from -/// from the argument area of a function if it does not change. This should -/// only return true of *all* loads the instruction does are invariant (if it -/// does multiple loads). -bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const { - // This code cares about loads from three cases: constant pool entries, - // invariant argument slots, and global stubs. In order to handle these cases - // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV - // operand and base our analysis on it. This is safe because the address of - // none of these three cases is ever used as anything other than a load base - // and X86 doesn't have any instructions that load from multiple places. - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - // Loads from constant pools are trivially invariant. - if (MO.isCPI()) - return true; - - if (MO.isGlobal()) - return isGVStub(MO.getGlobal(), TM); - - // If this is a load from an invariant stack slot, the load is a constant. - if (MO.isFI()) { - const MachineFrameInfo &MFI = - *MI->getParent()->getParent()->getFrameInfo(); - int Idx = MO.getIndex(); - return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx); - } - } - - // All other instances of these instructions are presumed to have other - // issues. - return false; + MachineInstr *NewMI = prior(I); + NewMI->getOperand(0).setSubReg(SubIdx); } /// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that @@ -1304,7 +1263,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { unsigned Opc; unsigned Size; switch (MI->getOpcode()) { - default: assert(0 && "Unreachable!"); + default: llvm_unreachable("Unreachable!"); case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; @@ -1459,7 +1418,7 @@ static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { switch (CC) { - default: assert(0 && "Illegal condition code!"); + default: llvm_unreachable("Illegal condition code!"); case X86::COND_E: return X86::JE; case X86::COND_NE: return X86::JNE; case X86::COND_L: return X86::JL; @@ -1483,7 +1442,7 @@ unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { /// e.g. turning COND_E to COND_NE. X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { switch (CC) { - default: assert(0 && "Illegal condition code!"); + default: llvm_unreachable("Illegal condition code!"); case X86::COND_E: return X86::COND_NE; case X86::COND_NE: return X86::COND_E; case X86::COND_L: return X86::COND_GE; @@ -1699,14 +1658,26 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, /* Source and destination have the same register class. */; else if (CommonRC->hasSuperClass(SrcRC)) CommonRC = SrcRC; - else if (!DestRC->hasSubClass(SrcRC)) - CommonRC = 0; + else if (!DestRC->hasSubClass(SrcRC)) { + // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, + // but we want to copy then as GR64. Similarly, for GR32_NOREX and + // GR32_NOSP, copy as GR32. + if (SrcRC->hasSuperClass(&X86::GR64RegClass) && + DestRC->hasSuperClass(&X86::GR64RegClass)) + CommonRC = &X86::GR64RegClass; + else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && + DestRC->hasSuperClass(&X86::GR32RegClass)) + CommonRC = &X86::GR32RegClass; + else + CommonRC = 0; + } if (CommonRC) { unsigned Opc; - if (CommonRC == &X86::GR64RegClass) { + if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32RegClass) { + } else if (CommonRC == &X86::GR32RegClass || + CommonRC == &X86::GR32_NOSPRegClass) { Opc = X86::MOV32rr; } else if (CommonRC == &X86::GR16RegClass) { Opc = X86::MOV16rr; @@ -1731,7 +1702,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, Opc = X86::MOV8rr_NOREX; else Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_NOREXRegClass) { + } else if (CommonRC == &X86::GR64_NOREXRegClass || + CommonRC == &X86::GR64_NOREX_NOSPRegClass) { Opc = X86::MOV64rr; } else if (CommonRC == &X86::GR32_NOREXRegClass) { Opc = X86::MOV32rr; @@ -1759,16 +1731,17 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); return true; } - + // Moving EFLAGS to / from another register requires a push and a pop. if (SrcRC == &X86::CCRRegClass) { if (SrcReg != X86::EFLAGS) return false; - if (DestRC == &X86::GR64RegClass) { + if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSHFQ)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return true; - } else if (DestRC == &X86::GR32RegClass) { + } else if (DestRC == &X86::GR32RegClass || + DestRC == &X86::GR32_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSHFD)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); return true; @@ -1776,11 +1749,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, } else if (DestRC == &X86::CCRRegClass) { if (DestReg != X86::EFLAGS) return false; - if (SrcRC == &X86::GR64RegClass) { + if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); BuildMI(MBB, MI, DL, get(X86::POPFQ)); return true; - } else if (SrcRC == &X86::GR32RegClass) { + } else if (SrcRC == &X86::GR32RegClass || + DestRC == &X86::GR32_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); BuildMI(MBB, MI, DL, get(X86::POPFD)); return true; @@ -1838,9 +1812,9 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, bool isStackAligned, TargetMachine &TM) { unsigned Opc = 0; - if (RC == &X86::GR64RegClass) { + if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { Opc = X86::MOV64mr; - } else if (RC == &X86::GR32RegClass) { + } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { Opc = X86::MOV32mr; } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16mr; @@ -1865,7 +1839,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, Opc = X86::MOV8mr_NOREX; else Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_NOREXRegClass) { + } else if (RC == &X86::GR64_NOREXRegClass || + RC == &X86::GR64_NOREX_NOSPRegClass) { Opc = X86::MOV64mr; } else if (RC == &X86::GR32_NOREXRegClass) { Opc = X86::MOV32mr; @@ -1889,8 +1864,7 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, } else if (RC == &X86::VR64RegClass) { Opc = X86::MMX_MOVQ64mr; } else { - assert(0 && "Unknown regclass"); - abort(); + llvm_unreachable("Unknown regclass"); } return Opc; @@ -1914,6 +1888,8 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC, + MachineInstr::mmo_iterator MMOBegin, + MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); @@ -1923,6 +1899,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, for (unsigned i = 0, e = Addr.size(); i != e; ++i) MIB.addOperand(Addr[i]); MIB.addReg(SrcReg, getKillRegState(isKill)); + (*MIB).setMemRefs(MMOBegin, MMOEnd); NewMIs.push_back(MIB); } @@ -1931,9 +1908,9 @@ static unsigned getLoadRegOpcode(unsigned DestReg, bool isStackAligned, const TargetMachine &TM) { unsigned Opc = 0; - if (RC == &X86::GR64RegClass) { + if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { Opc = X86::MOV64rm; - } else if (RC == &X86::GR32RegClass) { + } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { Opc = X86::MOV32rm; } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16rm; @@ -1958,7 +1935,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg, Opc = X86::MOV8rm_NOREX; else Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_NOREXRegClass) { + } else if (RC == &X86::GR64_NOREXRegClass || + RC == &X86::GR64_NOREX_NOSPRegClass) { Opc = X86::MOV64rm; } else if (RC == &X86::GR32_NOREXRegClass) { Opc = X86::MOV32rm; @@ -1982,8 +1960,7 @@ static unsigned getLoadRegOpcode(unsigned DestReg, } else if (RC == &X86::VR64RegClass) { Opc = X86::MMX_MOVQ64rm; } else { - assert(0 && "Unknown regclass"); - abort(); + llvm_unreachable("Unknown regclass"); } return Opc; @@ -2005,6 +1982,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC, + MachineInstr::mmo_iterator MMOBegin, + MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); @@ -2013,6 +1992,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); for (unsigned i = 0, e = Addr.size(); i != e; ++i) MIB.addOperand(Addr[i]); + (*MIB).setMemRefs(MMOBegin, MMOEnd); NewMIs.push_back(MIB); } @@ -2026,9 +2006,11 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, if (MI != MBB.end()) DL = MI->getDebugLoc(); bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); + bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); unsigned SlotSize = is64Bit ? 8 : 4; MachineFunction &MF = *MBB.getParent(); + unsigned FPReg = RI.getFrameRegister(MF); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); unsigned CalleeFrameSize = 0; @@ -2038,10 +2020,12 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - if (RegClass != &X86::VR128RegClass) { + if (Reg == FPReg) + // X86RegisterInfo::emitPrologue will handle spilling of frame register. + continue; + if (RegClass != &X86::VR128RegClass && !isWin64) { CalleeFrameSize += SlotSize; - BuildMI(MBB, MI, DL, get(Opc)) - .addReg(Reg, RegState::Kill); + BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); } @@ -2060,13 +2044,18 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL = DebugLoc::getUnknownLoc(); if (MI != MBB.end()) DL = MI->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + unsigned FPReg = RI.getFrameRegister(MF); bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); - + bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); + if (Reg == FPReg) + // X86RegisterInfo::emitEpilogue will handle restoring of frame register. + continue; const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass != &X86::VR128RegClass) { + if (RegClass != &X86::VR128RegClass && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); @@ -2143,8 +2132,9 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned i, - const SmallVectorImpl<MachineOperand> &MOs) const{ - const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL; + const SmallVectorImpl<MachineOperand> &MOs, + unsigned Size, unsigned Align) const { + const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; bool isTwoAddrFold = false; unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && @@ -2165,8 +2155,6 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); else if (MI->getOpcode() == X86::MOV32r0) NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); - else if (MI->getOpcode() == X86::MOV64r0) - NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); else if (MI->getOpcode() == X86::MOV8r0) NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); if (NewMI) @@ -2182,60 +2170,82 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // If table selected... if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap<unsigned*, unsigned>::iterator I = + DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = OpcodeTablePtr->find((unsigned*)MI->getOpcode()); if (I != OpcodeTablePtr->end()) { + unsigned Opcode = I->second.first; + unsigned MinAlign = I->second.second; + if (Align < MinAlign) + return NULL; + bool NarrowToMOV32rm = false; + if (Size) { + unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); + if (Size < RCSize) { + // Check if it's safe to fold the load. If the size of the object is + // narrower than the load width, then it's not. + if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) + return NULL; + // If this is a 64-bit load, but the spill slot is 32, then we can do + // a 32-bit load which is implicitly zero-extended. This likely is due + // to liveintervalanalysis remat'ing a load from stack slot. + if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) + return NULL; + Opcode = X86::MOV32rm; + NarrowToMOV32rm = true; + } + } + if (isTwoAddrFold) - NewMI = FuseTwoAddrInst(MF, I->second, MOs, MI, *this); + NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); else - NewMI = FuseInst(MF, I->second, i, MOs, MI, *this); + NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this); + + if (NarrowToMOV32rm) { + // If this is the special case where we use a MOV32rm to load a 32-bit + // value and zero-extend the top bits. Change the destination register + // to a 32-bit one. + unsigned DstReg = NewMI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, + 4/*x86_subreg_32bit*/)); + else + NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/); + } return NewMI; } } // No fusion if (PrintFailedFusing) - cerr << "We failed to fuse operand " << i << " in " << *MI; + errs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; } MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, + const SmallVectorImpl<unsigned> &Ops, int FrameIndex) const { // Check switch flag if (NoFusing) return NULL; const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Size = MFI->getObjectSize(FrameIndex); unsigned Alignment = MFI->getObjectAlignment(FrameIndex); - // FIXME: Move alignment requirement into tables? - if (Alignment < 16) { - switch (MI->getOpcode()) { - default: break; - // Not always safe to fold movsd into these instructions since their load - // folding variants expects the address to be 16 byte aligned. - case X86::FsANDNPDrr: - case X86::FsANDNPSrr: - case X86::FsANDPDrr: - case X86::FsANDPSrr: - case X86::FsORPDrr: - case X86::FsORPSrr: - case X86::FsXORPDrr: - case X86::FsXORPSrr: - return NULL; - } - } - if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; + unsigned RCSize = 0; switch (MI->getOpcode()) { default: return NULL; - case X86::TEST8rr: NewOpc = X86::CMP8ri; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; + case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; } + // Check if it's safe to fold the load. If the size of the object is + // narrower than the load width, then it's not. + if (Size < RCSize) + return NULL; // Change to CMPXXri r, 0 first. MI->setDesc(get(NewOpc)); MI->getOperand(1).ChangeToImmediate(0); @@ -2244,12 +2254,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, SmallVector<MachineOperand,4> MOs; MOs.push_back(MachineOperand::CreateFI(FrameIndex)); - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); } MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, + const SmallVectorImpl<unsigned> &Ops, MachineInstr *LoadMI) const { // Check switch flag if (NoFusing) return NULL; @@ -2257,26 +2267,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Determine the alignment of the load. unsigned Alignment = 0; if (LoadMI->hasOneMemOperand()) - Alignment = LoadMI->memoperands_begin()->getAlignment(); - - // FIXME: Move alignment requirement into tables? - if (Alignment < 16) { - switch (MI->getOpcode()) { - default: break; - // Not always safe to fold movsd into these instructions since their load - // folding variants expects the address to be 16 byte aligned. - case X86::FsANDNPDrr: - case X86::FsANDNPSrr: - case X86::FsANDPDrr: - case X86::FsANDPSrr: - case X86::FsORPDrr: - case X86::FsORPSrr: - case X86::FsXORPDrr: - case X86::FsXORPSrr: - return NULL; + Alignment = (*LoadMI->memoperands_begin())->getAlignment(); + else + switch (LoadMI->getOpcode()) { + case X86::V_SET0: + case X86::V_SETALLONES: + Alignment = 16; + break; + case X86::FsFLD0SD: + Alignment = 8; + break; + case X86::FsFLD0SS: + Alignment = 4; + break; + default: + llvm_unreachable("Don't know how to fold this instruction!"); } - } - if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; switch (MI->getOpcode()) { @@ -2293,28 +2299,40 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NULL; SmallVector<MachineOperand,X86AddrNumOperands> MOs; - if (LoadMI->getOpcode() == X86::V_SET0 || - LoadMI->getOpcode() == X86::V_SETALLONES) { + switch (LoadMI->getOpcode()) { + case X86::V_SET0: + case X86::V_SETALLONES: + case X86::FsFLD0SD: + case X86::FsFLD0SS: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; - if (TM.getRelocationModel() == Reloc::PIC_ && - !TM.getSubtarget<X86Subtarget>().is64Bit()) - // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); - // This doesn't work for several reasons. - // 1. GlobalBaseReg may have been spilled. - // 2. It may not be live at MI. - return false; + if (TM.getRelocationModel() == Reloc::PIC_) { + if (TM.getSubtarget<X86Subtarget>().is64Bit()) + PICBase = X86::RIP; + else + // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); + // This doesn't work for several reasons. + // 1. GlobalBaseReg may have been spilled. + // 2. It may not be live at MI. + return NULL; + } - // Create a v4i32 constant-pool entry. + // Create a constant-pool entry. MachineConstantPool &MCP = *MF.getConstantPool(); - const VectorType *Ty = VectorType::get(Type::Int32Ty, 4); - Constant *C = LoadMI->getOpcode() == X86::V_SET0 ? - ConstantVector::getNullValue(Ty) : - ConstantVector::getAllOnesValue(Ty); - unsigned CPI = MCP.getConstantPoolIndex(C, 16); + const Type *Ty; + if (LoadMI->getOpcode() == X86::FsFLD0SS) + Ty = Type::getFloatTy(MF.getFunction()->getContext()); + else if (LoadMI->getOpcode() == X86::FsFLD0SD) + Ty = Type::getDoubleTy(MF.getFunction()->getContext()); + else + Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); + Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? + Constant::getAllOnesValue(Ty) : + Constant::getNullValue(Ty); + unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); // Create operands to load from the constant pool entry. MOs.push_back(MachineOperand::CreateReg(PICBase, false)); @@ -2322,13 +2340,17 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MOs.push_back(MachineOperand::CreateReg(0, false)); MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); MOs.push_back(MachineOperand::CreateReg(0, false)); - } else { + break; + } + default: { // Folding a normal load. Just copy the load's address operands. unsigned NumOps = LoadMI->getDesc().getNumOperands(); for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) MOs.push_back(LoadMI->getOperand(i)); + break; + } } - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); } @@ -2360,15 +2382,14 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires // replacing the *two* registers with the memory location. - const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL; + const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; if (isTwoAddr && NumOps >= 2 && OpNum < 2) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; } else if (OpNum == 0) { // If operand 0 switch (Opc) { + case X86::MOV8r0: case X86::MOV16r0: case X86::MOV32r0: - case X86::MOV64r0: - case X86::MOV8r0: return true; default: break; } @@ -2381,7 +2402,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap<unsigned*, unsigned>::iterator I = + DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = OpcodeTablePtr->find((unsigned*)Opc); if (I != OpcodeTablePtr->end()) return true; @@ -2410,8 +2431,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetInstrDesc &TID = get(Opc); const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = TOI.isLookupPtrRegClass() - ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass); + const TargetRegisterClass *RC = TOI.getRegClass(&RI); SmallVector<MachineOperand, X86AddrNumOperands> AddrOps; SmallVector<MachineOperand,2> BeforeOps; SmallVector<MachineOperand,2> AfterOps; @@ -2430,7 +2450,11 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the load instruction. if (UnfoldLoad) { - loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs); + std::pair<MachineInstr::mmo_iterator, + MachineInstr::mmo_iterator> MMOs = + MF.extractLoadMemRefs(MI->memoperands_begin(), + MI->memoperands_end()); + loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); if (UnfoldStore) { // Address operands cannot be marked isKill. for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { @@ -2489,10 +2513,12 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the store instruction. if (UnfoldStore) { - const TargetOperandInfo &DstTOI = TID.OpInfo[0]; - const TargetRegisterClass *DstRC = DstTOI.isLookupPtrRegClass() - ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); - storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs); + const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); + std::pair<MachineInstr::mmo_iterator, + MachineInstr::mmo_iterator> MMOs = + MF.extractStoreMemRefs(MI->memoperands_begin(), + MI->memoperands_end()); + storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs); } return true; @@ -2513,9 +2539,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, bool FoldedLoad = I->second.second & (1 << 4); bool FoldedStore = I->second.second & (1 << 5); const TargetInstrDesc &TID = get(Opc); - const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = TOI.isLookupPtrRegClass() - ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass); + const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); unsigned NumDefs = TID.NumDefs; std::vector<SDValue> AddrOps; std::vector<SDValue> BeforeOps; @@ -2536,35 +2560,40 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the load instruction. SDNode *Load = 0; - const MachineFunction &MF = DAG.getMachineFunction(); + MachineFunction &MF = DAG.getMachineFunction(); if (FoldedLoad) { - MVT VT = *RC->vt_begin(); + EVT VT = *RC->vt_begin(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, - VT, MVT::Other, &AddrOps[0], AddrOps.size()); + Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, + VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); + + // Preserve memory reference information. + std::pair<MachineInstr::mmo_iterator, + MachineInstr::mmo_iterator> MMOs = + MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), + cast<MachineSDNode>(N)->memoperands_end()); + cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); } // Emit the data processing instruction. - std::vector<MVT> VTs; + std::vector<EVT> VTs; const TargetRegisterClass *DstRC = 0; if (TID.getNumDefs() > 0) { - const TargetOperandInfo &DstTOI = TID.OpInfo[0]; - DstRC = DstTOI.isLookupPtrRegClass() - ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); + DstRC = TID.OpInfo[0].getRegClass(&RI); VTs.push_back(*DstRC->vt_begin()); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - MVT VT = N->getValueType(i); + EVT VT = N->getValueType(i); if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) VTs.push_back(VT); } if (Load) BeforeOps.push_back(SDValue(Load, 0)); std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); - SDNode *NewNode= DAG.getTargetNode(Opc, dl, VTs, &BeforeOps[0], - BeforeOps.size()); + SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0], + BeforeOps.size()); NewNodes.push_back(NewNode); // Emit the store instruction. @@ -2574,11 +2603,18 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, AddrOps.push_back(Chain); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC, - isAligned, TM), - dl, MVT::Other, - &AddrOps[0], AddrOps.size()); + SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, + isAligned, TM), + dl, MVT::Other, + &AddrOps[0], AddrOps.size()); NewNodes.push_back(Store); + + // Preserve memory reference information. + std::pair<MachineInstr::mmo_iterator, + MachineInstr::mmo_iterator> MMOs = + MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), + cast<MachineSDNode>(N)->memoperands_end()); + cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); } return true; @@ -2644,7 +2680,7 @@ unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { case X86II::Imm16: return 2; case X86II::Imm32: return 4; case X86II::Imm64: return 8; - default: assert(0 && "Immediate size not set!"); + default: llvm_unreachable("Immediate size not set!"); return 0; } } @@ -2829,7 +2865,7 @@ static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { } else if (RelocOp->isJTI()) { FinalSize += sizeJumpTableAddress(false); } else { - assert(0 && "Unknown value to relocate!"); + llvm_unreachable("Unknown value to relocate!"); } return FinalSize; } @@ -2926,7 +2962,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::GS: ++FinalSize; break; - default: assert(0 && "Invalid segment!"); + default: llvm_unreachable("Invalid segment!"); case 0: break; // No segment override! } @@ -2946,6 +2982,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::TA: // 0F 3A Need0FPrefix = true; break; + case X86II::TF: // F2 0F 38 + ++FinalSize; + Need0FPrefix = true; + break; case X86II::REP: break; // already handled. case X86II::XS: // F3 0F ++FinalSize; @@ -2959,7 +2999,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: ++FinalSize; break; // Two-byte opcode prefix - default: assert(0 && "Invalid prefix!"); + default: llvm_unreachable("Invalid prefix!"); case 0: break; // No prefix! } @@ -2981,6 +3021,9 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::TA: // 0F 3A ++FinalSize; break; + case X86II::TF: // F2 0F 38 + ++FinalSize; + break; } // If this is a two-address instruction, skip one of the register operands. @@ -2993,7 +3036,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, --NumOps; switch (Desc->TSFlags & X86II::FormMask) { - default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!"); + default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); case X86II::Pseudo: // Remember the current PC offset, this is the PIC relocation // base address. @@ -3002,16 +3045,16 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, break; case TargetInstrInfo::INLINEASM: { const MachineFunction *MF = MI.getParent()->getParent(); - const char *AsmStr = MI.getOperand(0).getSymbolName(); - const TargetAsmInfo* AI = MF->getTarget().getTargetAsmInfo(); - FinalSize += AI->getInlineAsmLength(AsmStr); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), + *MF->getTarget().getMCAsmInfo()); break; } case TargetInstrInfo::DBG_LABEL: case TargetInstrInfo::EH_LABEL: break; case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::DECLARE: + case TargetInstrInfo::KILL: case X86::DWARF_LOC: case X86::FP_REG_KILL: break; @@ -3038,7 +3081,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } else if (MO.isImm()) { FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); } else { - assert(0 && "Unknown RawFrm operand!"); + llvm_unreachable("Unknown RawFrm operand!"); } } break; @@ -3196,10 +3239,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } if (!Desc->isVariadic() && CurOp != NumOps) { - cerr << "Cannot determine size: "; - MI.dump(); - cerr << '\n'; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Cannot determine size: " << MI; + llvm_report_error(Msg.str()); } @@ -3209,7 +3252,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { const TargetInstrDesc &Desc = MI->getDesc(); - bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_); + bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); if (Desc.getOpcode() == X86::MOVPC32r) @@ -3245,12 +3288,11 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { // If we're using vanilla 'GOT' PIC style, we should use relative addressing // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. - if (TM.getRelocationModel() == Reloc::PIC_ && - TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) { + if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) { GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) - .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 0, + .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", X86II::MO_GOT_ABSOLUTE_ADDRESS); } else { GlobalBaseReg = PC; diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 83f0194..2237c8b 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -69,35 +69,36 @@ namespace X86 { /// instruction info tracks. /// namespace X86II { - enum { + /// Target Operand Flag enum. + enum TOF { //===------------------------------------------------------------------===// // X86 Specific MachineOperand flags. - MO_NO_FLAG = 0, + MO_NO_FLAG, /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a /// relocation of: /// SYMBOL_LABEL + [. - PICBASELABEL] - MO_GOT_ABSOLUTE_ADDRESS = 1, + MO_GOT_ABSOLUTE_ADDRESS, /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the /// immediate should get the value of the symbol minus the PIC base label: /// SYMBOL_LABEL - PICBASELABEL - MO_PIC_BASE_OFFSET = 2, + MO_PIC_BASE_OFFSET, /// MO_GOT - On a symbol operand this indicates that the immediate is the /// offset to the GOT entry for the symbol name from the base of the GOT. /// /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOT - MO_GOT = 3, + MO_GOT, /// MO_GOTOFF - On a symbol operand this indicates that the immediate is /// the offset to the location of the symbol name from the base of the GOT. /// /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOTOFF - MO_GOTOFF = 4, + MO_GOTOFF, /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is /// offset to the GOT entry for the symbol name from the current code @@ -105,50 +106,115 @@ namespace X86II { /// /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOTPCREL - MO_GOTPCREL = 5, + MO_GOTPCREL, /// MO_PLT - On a symbol operand this indicates that the immediate is /// offset to the PLT entry of symbol name from the current code location. /// /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @PLT - MO_PLT = 6, + MO_PLT, /// MO_TLSGD - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TLSGD - MO_TLSGD = 7, + MO_TLSGD, /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @GOTTPOFF - MO_GOTTPOFF = 8, + MO_GOTTPOFF, /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @INDNTPOFF - MO_INDNTPOFF = 9, + MO_INDNTPOFF, /// MO_TPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TPOFF - MO_TPOFF = 10, + MO_TPOFF, /// MO_NTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @NTPOFF - MO_NTPOFF = 11, + MO_NTPOFF, + + /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "__imp_FOO" symbol. This is used for + /// dllimport linkage on windows. + MO_DLLIMPORT, + + /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "FOO$stub" symbol. This is used for calls + /// and jumps to external functions on Tiger and before. + MO_DARWIN_STUB, + /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a + /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub. + MO_DARWIN_NONLAZY, + + /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates + /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is + /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub. + MO_DARWIN_NONLAZY_PIC_BASE, + + /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this + /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE", + /// which is a PIC-base-relative reference to a hidden dyld lazy pointer + /// stub. + MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE + }; +} + +/// isGlobalStubReference - Return true if the specified TargetFlag operand is +/// a reference to a stub for a global, not the global itself. +inline static bool isGlobalStubReference(unsigned char TargetFlag) { + switch (TargetFlag) { + case X86II::MO_DLLIMPORT: // dllimport stub. + case X86II::MO_GOTPCREL: // rip-relative GOT reference. + case X86II::MO_GOT: // normal GOT reference. + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Normal $non_lazy_ptr ref. + case X86II::MO_DARWIN_NONLAZY: // Normal $non_lazy_ptr ref. + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Hidden $non_lazy_ptr ref. + return true; + default: + return false; + } +} + +/// isGlobalRelativeToPICBase - Return true if the specified global value +/// reference is relative to a 32-bit PIC base (X86ISD::GlobalBaseReg). If this +/// is true, the addressing mode has the PIC base register added in (e.g. EBX). +inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) { + switch (TargetFlag) { + case X86II::MO_GOTOFF: // isPICStyleGOT: local global. + case X86II::MO_GOT: // isPICStyleGOT: other global. + case X86II::MO_PIC_BASE_OFFSET: // Darwin local global. + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Darwin/32 external global. + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global. + return true; + default: + return false; + } +} + +/// X86II - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace X86II { + enum { //===------------------------------------------------------------------===// // Instruction encodings. These are the standard/most common forms for X86 // instructions. @@ -249,6 +315,9 @@ namespace X86II { // T8, TA - Prefix after the 0x0F prefix. T8 = 13 << Op0Shift, TA = 14 << Op0Shift, + + // TF - Prefix before and after 0x0F + TF = 15 << Op0Shift, //===------------------------------------------------------------------===// // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. @@ -355,10 +424,10 @@ class X86InstrInfo : public TargetInstrInfoImpl { /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, /// RegOp2MemOpTable2 - Load / store folding opcode maps. /// - DenseMap<unsigned*, unsigned> RegOp2MemOpTable2Addr; - DenseMap<unsigned*, unsigned> RegOp2MemOpTable0; - DenseMap<unsigned*, unsigned> RegOp2MemOpTable1; - DenseMap<unsigned*, unsigned> RegOp2MemOpTable2; + DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr; + DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable0; + DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable1; + DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2; /// MemOp2RegOpTable - Load / store unfolding opcode map. /// @@ -382,11 +451,11 @@ public: unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - bool isReallyTriviallyReMaterializable(const MachineInstr *MI) const; + bool isReallyTriviallyReMaterializable(const MachineInstr *MI, + AliasAnalysis *AA) const; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned DestReg, const MachineInstr *Orig) const; - - bool isInvariantLoad(const MachineInstr *MI) const; + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig) const; /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target @@ -430,6 +499,8 @@ public: virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC, + MachineInstr::mmo_iterator MMOBegin, + MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, @@ -440,6 +511,8 @@ public: virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC, + MachineInstr::mmo_iterator MMOBegin, + MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const; virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, @@ -530,9 +603,10 @@ public: private: MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - unsigned OpNum, - const SmallVectorImpl<MachineOperand> &MOs) const; + MachineInstr* MI, + unsigned OpNum, + const SmallVectorImpl<MachineOperand> &MOs, + unsigned Size, unsigned Alignment) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 03df10d..30b57d8 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -56,6 +56,10 @@ def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; +def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, + SDTCisVT<1, iPTR>, + SDTCisVT<2, iPTR>]>; + def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; def SDTX86RdTsc : SDTypeProfile<0, 0, []>; @@ -114,6 +118,11 @@ def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary, def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, [SDNPHasChain, SDNPOptInFlag]>; +def X86vastart_save_xmm_regs : + SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", + SDT_X86VASTART_SAVE_XMM_REGS, + [SDNPHasChain]>; + def X86callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart, [SDNPHasChain, SDNPOutFlag]>; @@ -124,9 +133,6 @@ def X86callseq_end : def X86call : SDNode<"X86ISD::CALL", SDT_X86Call, [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; -def X86tailcall: SDNode<"X86ISD::TAILCALL", SDT_X86Call, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; - def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>; def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, @@ -156,6 +162,9 @@ def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>; def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>; def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; +def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags>; +def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags>; +def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; @@ -167,57 +176,80 @@ def i32imm_pcrel : Operand<i32> { let PrintMethod = "print_pcrel_imm"; } +// A version of ptr_rc which excludes SP, ESP, and RSP. This is used for +// the index operand of an address, to conform to x86 encoding restrictions. +def ptr_rc_nosp : PointerLikeRegClass<1>; // *mem - Operand definitions for the funky X86 addressing mode operands. // +def X86MemAsmOperand : AsmOperandClass { + let Name = "Mem"; + let SuperClass = ?; +} class X86MemOperand<string printMethod> : Operand<iPTR> { let PrintMethod = printMethod; - let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm); + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; } +def opaque32mem : X86MemOperand<"printopaquemem">; +def opaque48mem : X86MemOperand<"printopaquemem">; +def opaque80mem : X86MemOperand<"printopaquemem">; + def i8mem : X86MemOperand<"printi8mem">; def i16mem : X86MemOperand<"printi16mem">; def i32mem : X86MemOperand<"printi32mem">; def i64mem : X86MemOperand<"printi64mem">; def i128mem : X86MemOperand<"printi128mem">; -def i256mem : X86MemOperand<"printi256mem">; +//def i256mem : X86MemOperand<"printi256mem">; def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; def f128mem : X86MemOperand<"printf128mem">; -def f256mem : X86MemOperand<"printf256mem">; +//def f256mem : X86MemOperand<"printf256mem">; // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of // plain GR64, so that it doesn't potentially require a REX prefix. def i8mem_NOREX : Operand<i64> { let PrintMethod = "printi8mem"; - let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX, i32imm, i8imm); + let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; } def lea32mem : Operand<i32> { let PrintMethod = "printlea32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm); + let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); + let ParserMatchClass = X86MemAsmOperand; } def SSECC : Operand<i8> { let PrintMethod = "printSSECC"; } -def piclabel: Operand<i32> { - let PrintMethod = "printPICLabel"; +def ImmSExt8AsmOperand : AsmOperandClass { + let Name = "ImmSExt8"; + let SuperClass = ImmAsmOperand; } // A couple of more descriptive operand definitions. // 16-bits but only 8 bits are significant. -def i16i8imm : Operand<i16>; +def i16i8imm : Operand<i16> { + let ParserMatchClass = ImmSExt8AsmOperand; +} // 32-bits but only 8 bits are significant. -def i32i8imm : Operand<i32>; +def i32i8imm : Operand<i32> { + let ParserMatchClass = ImmSExt8AsmOperand; +} // Branch targets have OtherVT type and print as pc-relative values. def brtarget : Operand<OtherVT> { let PrintMethod = "print_pcrel_imm"; } +def brtarget8 : Operand<OtherVT> { + let PrintMethod = "print_pcrel_imm"; +} + //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. // @@ -225,7 +257,8 @@ def brtarget : Operand<OtherVT> { // Define X86 specific addressing mode. def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>; def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr", - [add, sub, mul, shl, or, frameindex], []>; + [add, sub, mul, X86mul_imm, shl, or, frameindex], + []>; def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; @@ -246,8 +279,14 @@ def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def In32BitMode : Predicate<"!Subtarget->is64Bit()">; def In64BitMode : Predicate<"Subtarget->is64Bit()">; +def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; +def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; -def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">; +def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">; +def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&" + "TM.getCodeModel() != CodeModel::Kernel">; +def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||" + "TM.getCodeModel() == CodeModel::Kernel">; def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; @@ -484,15 +523,35 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), Requires<[In32BitMode]>; } +// x86-64 va_start lowering magic. +let usesCustomDAGSchedInserter = 1 in +def VASTART_SAVE_XMM_REGS : I<0, Pseudo, + (outs), + (ins GR8:$al, + i64imm:$regsavefi, i64imm:$offset, + variable_ops), + "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", + [(X86vastart_save_xmm_regs GR8:$al, + imm:$regsavefi, + imm:$offset)]>; + // Nop -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1 in { def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>; + def NOOPL : I<0x1f, MRM0m, (outs), (ins i32mem:$zero), + "nopl\t$zero", []>, TB; +} -// PIC base +// Trap +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int 3", []>; +def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; + +// PIC base construction. This expands to code that looks like this: +// call $next_inst +// popl %destreg" let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in - def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins piclabel:$label), - "call\t$label\n\t" - "pop{l}\t$reg", []>; + def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label), + "", []>; //===----------------------------------------------------------------------===// // Control Flow Instructions... @@ -506,7 +565,11 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, [(X86retflag 0)]>; def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret\t$amt", - [(X86retflag imm:$amt)]>; + [(X86retflag timm:$amt)]>; + def LRET : I <0xCB, RawFrm, (outs), (ins), + "lret", []>; + def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), + "lret\t$amt", []>; } // All branches are RawFrm, Void, Branch, and Terminators @@ -514,8 +577,10 @@ let isBranch = 1, isTerminator = 1 in class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> : I<opcode, RawFrm, (outs), ins, asm, pattern>; -let isBranch = 1, isBarrier = 1 in +let isBranch = 1, isBarrier = 1 in { def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>; + def JMP8 : IBr<0xEB, (ins brtarget8:$dst), "jmp\t$dst", []>; +} // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { @@ -523,10 +588,42 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { [(brind GR32:$dst)]>; def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", [(brind (loadi32 addr:$dst))]>; + + def FARJMP16i : Iseg16<0xEA, RawFrm, (outs), + (ins i16imm:$seg, i16imm:$off), + "ljmp{w}\t$seg, $off", []>, OpSize; + def FARJMP32i : Iseg32<0xEA, RawFrm, (outs), + (ins i16imm:$seg, i32imm:$off), + "ljmp{l}\t$seg, $off", []>; + + def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), + "ljmp{w}\t{*}$dst", []>, OpSize; + def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst), + "ljmp{l}\t{*}$dst", []>; } // Conditional branches let Uses = [EFLAGS] in { +// Short conditional jumps +def JO8 : IBr<0x70, (ins brtarget8:$dst), "jo\t$dst", []>; +def JNO8 : IBr<0x71, (ins brtarget8:$dst), "jno\t$dst", []>; +def JB8 : IBr<0x72, (ins brtarget8:$dst), "jb\t$dst", []>; +def JAE8 : IBr<0x73, (ins brtarget8:$dst), "jae\t$dst", []>; +def JE8 : IBr<0x74, (ins brtarget8:$dst), "je\t$dst", []>; +def JNE8 : IBr<0x75, (ins brtarget8:$dst), "jne\t$dst", []>; +def JBE8 : IBr<0x76, (ins brtarget8:$dst), "jbe\t$dst", []>; +def JA8 : IBr<0x77, (ins brtarget8:$dst), "ja\t$dst", []>; +def JS8 : IBr<0x78, (ins brtarget8:$dst), "js\t$dst", []>; +def JNS8 : IBr<0x79, (ins brtarget8:$dst), "jns\t$dst", []>; +def JP8 : IBr<0x7A, (ins brtarget8:$dst), "jp\t$dst", []>; +def JNP8 : IBr<0x7B, (ins brtarget8:$dst), "jnp\t$dst", []>; +def JL8 : IBr<0x7C, (ins brtarget8:$dst), "jl\t$dst", []>; +def JGE8 : IBr<0x7D, (ins brtarget8:$dst), "jge\t$dst", []>; +def JLE8 : IBr<0x7E, (ins brtarget8:$dst), "jle\t$dst", []>; +def JG8 : IBr<0x7F, (ins brtarget8:$dst), "jg\t$dst", []>; + +def JCXZ8 : IBr<0xE3, (ins brtarget8:$dst), "jcxz\t$dst", []>; + def JE : IBr<0x84, (ins brtarget:$dst), "je\t$dst", [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB; def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst", @@ -563,6 +660,12 @@ def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst", [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB; } // Uses = [EFLAGS] +// Loop instructions + +def LOOP : I<0xE2, RawFrm, (ins brtarget8:$dst), (outs), "loop\t$dst", []>; +def LOOPE : I<0xE1, RawFrm, (ins brtarget8:$dst), (outs), "loope\t$dst", []>; +def LOOPNE : I<0xE0, RawFrm, (ins brtarget8:$dst), (outs), "loopne\t$dst", []>; + //===----------------------------------------------------------------------===// // Call Instructions... // @@ -583,13 +686,26 @@ let isCall = 1 in "call\t{*}$dst", [(X86call GR32:$dst)]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>; + + def FARCALL16i : Iseg16<0x9A, RawFrm, (outs), + (ins i16imm:$seg, i16imm:$off), + "lcall{w}\t$seg, $off", []>, OpSize; + def FARCALL32i : Iseg32<0x9A, RawFrm, (outs), + (ins i16imm:$seg, i32imm:$off), + "lcall{l}\t$seg, $off", []>; + + def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), + "lcall{w}\t{*}$dst", []>, OpSize; + def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), + "lcall{l}\t{*}$dst", []>; } -// Tail call stuff. +// Constructing a stack frame. + +def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl), + "enter\t$len, $lvl", []>; -def TAILCALL : I<0, Pseudo, (outs), (ins), - "#TAILCALL", - []>; +// Tail call stuff. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TCRETURNdi : I<0, Pseudo, (outs), (ins i32imm:$dst, i32imm:$offset, variable_ops), @@ -620,11 +736,29 @@ def LEAVE : I<0xC9, RawFrm, (outs), (ins), "leave", []>; let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in { -let mayLoad = 1 in -def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>; +let mayLoad = 1 in { +def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>, + OpSize; +def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>; +def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>, + OpSize; +def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>, + OpSize; +def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>; +def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>; +} -let mayStore = 1 in +let mayStore = 1 in { +def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>, + OpSize; def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>; +def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>, + OpSize; +def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>, + OpSize; +def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>; +def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>; +} } let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in { @@ -710,6 +844,14 @@ let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI] in def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)]>, REP; +def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>; +def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize; +def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>; + +def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>; +def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize; +def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>; + let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB; @@ -718,6 +860,18 @@ let isBarrier = 1, hasCtrlDep = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; } +def SYSCALL : I<0x05, RawFrm, + (outs), (ins), "syscall", []>, TB; +def SYSRET : I<0x07, RawFrm, + (outs), (ins), "sysret", []>, TB; +def SYSENTER : I<0x34, RawFrm, + (outs), (ins), "sysenter", []>, TB; +def SYSEXIT : I<0x35, RawFrm, + (outs), (ins), "sysexit", []>, TB; + +def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>; + + //===----------------------------------------------------------------------===// // Input/Output Instructions... // @@ -793,6 +947,30 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; +def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins i8imm:$src), + "mov{b}\t{$src, %al|%al, $src}", []>; +def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins i16imm:$src), + "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize; +def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins i32imm:$src), + "mov{l}\t{$src, %eax|%eax, $src}", []>; + +def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs i8imm:$dst), (ins), + "mov{b}\t{%al, $dst|$dst, %al}", []>; +def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs i16imm:$dst), (ins), + "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize; +def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs i32imm:$dst), (ins), + "mov{l}\t{%eax, $dst|$dst, %eax}", []>; + +// Moves to and from segment registers +def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; + let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", @@ -950,6 +1128,20 @@ let isTwoAddress = 1 in { // Conditional moves let Uses = [EFLAGS] in { + +// X86 doesn't have 8-bit conditional moves. Use a customDAGSchedInserter to +// emit control flow. An alternative to this is to mark i8 SELECT as Promote, +// however that requires promoting the operands, and can induce additional +// i8 register pressure. Note that CMOV_GR8 is conservatively considered to +// clobber EFLAGS, because if one of the operands is zero, the expansion +// could involve an xor. +let usesCustomDAGSchedInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in +def CMOV_GR8 : I<0, Pseudo, + (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond), + "#CMOV_GR8 PSEUDO!", + [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2, + imm:$cond, EFLAGS))]>; + let isCommutable = 1 in { def CMOVB16rr : I<0x42, MRMSrcReg, // if <u, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), @@ -1549,6 +1741,14 @@ let isTwoAddress = 0 in { "and{l}\t{$src, $dst|$dst, $src}", [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst), (implicit EFLAGS)]>; + + def AND8i8 : Ii8<0x24, RawFrm, (outs), (ins i8imm:$src), + "and{b}\t{$src, %al|%al, $src}", []>; + def AND16i16 : Ii16<0x25, RawFrm, (outs), (ins i16imm:$src), + "and{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src), + "and{l}\t{$src, %eax|%eax, $src}", []>; + } @@ -1635,6 +1835,13 @@ let isTwoAddress = 0 in { "or{l}\t{$src, $dst|$dst, $src}", [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst), (implicit EFLAGS)]>; + + def OR8i8 : Ii8 <0x0C, RawFrm, (outs), (ins i8imm:$src), + "or{b}\t{$src, %al|%al, $src}", []>; + def OR16i16 : Ii16 <0x0D, RawFrm, (outs), (ins i16imm:$src), + "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src), + "or{l}\t{$src, %eax|%eax, $src}", []>; } // isTwoAddress = 0 @@ -1744,6 +1951,13 @@ let isTwoAddress = 0 in { "xor{l}\t{$src, $dst|$dst, $src}", [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst), (implicit EFLAGS)]>; + + def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src), + "xor{b}\t{$src, %al|%al, $src}", []>; + def XOR16i16 : Ii16 <0x35, RawFrm, (outs), (ins i16imm:$src), + "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def XOR32i32 : Ii32 <0x35, RawFrm, (outs), (ins i32imm:$src), + "xor{l}\t{$src, %eax|%eax, $src}", []>; } // isTwoAddress = 0 } // Defs = [EFLAGS] @@ -1771,8 +1985,17 @@ def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "shl{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>; -// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is -// cheaper. + +// NOTE: We don't include patterns for shifts of a register by one, because +// 'add reg,reg' is cheaper. + +def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1), + "shl{b}\t$dst", []>; +def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1), + "shl{w}\t$dst", []>, OpSize; +def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), + "shl{l}\t$dst", []>; + } // isConvertibleToThreeAddress = 1 let isTwoAddress = 0 in { @@ -1951,6 +2174,97 @@ let isTwoAddress = 0 in { } // Rotate instructions + +def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src), + "rcl{b}\t{1, $dst|$dst, 1}", []>; +def RCL8m1 : I<0xD0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), + "rcl{b}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; +def RCL8mCL : I<0xD2, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; +} +def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL8mi : Ii8<0xC0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; + +def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src), + "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCL16m1 : I<0xD1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), + "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; +let Uses = [CL] in { +def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCL16mCL : I<0xD3, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +} +def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; + +def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src), + "rcl{l}\t{1, $dst|$dst, 1}", []>; +def RCL32m1 : I<0xD1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), + "rcl{l}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; +def RCL32mCL : I<0xD3, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; +} +def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; + +def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src), + "rcr{b}\t{1, $dst|$dst, 1}", []>; +def RCR8m1 : I<0xD0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), + "rcr{b}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; +def RCR8mCL : I<0xD2, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; +} +def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR8mi : Ii8<0xC0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; + +def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src), + "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCR16m1 : I<0xD1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), + "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; +let Uses = [CL] in { +def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCR16mCL : I<0xD3, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +} +def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; + +def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src), + "rcr{l}\t{1, $dst|$dst, 1}", []>; +def RCR32m1 : I<0xD1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), + "rcr{l}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; +def RCR32mCL : I<0xD3, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; +} +def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), + "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src, i8imm:$cnt), + "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; + // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), @@ -2228,6 +2542,15 @@ def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), "add{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (add GR32:$src1, (load addr:$src2))), (implicit EFLAGS)]>; + +// Register-Register Addition - Equivalent to the normal rr forms (ADD8rr, +// ADD16rr, and ADD32rr), but differently encoded. +def ADD8mrmrr: I<0x02, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "add{b}\t{$src2, $dst|$dst, $src2}", []>; +def ADD16mrmrr: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2), + "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; +def ADD32mrmrr: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", []>; // Register-Integer Addition def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), @@ -2295,6 +2618,14 @@ let isTwoAddress = 0 in { [(store (add (load addr:$dst), i32immSExt8:$src2), addr:$dst), (implicit EFLAGS)]>; + + // addition to rAX + def ADD8i8 : Ii8<0x04, RawFrm, (outs), (ins i8imm:$src), + "add{b}\t{$src, %al|%al, $src}", []>; + def ADD16i16 : Ii16<0x05, RawFrm, (outs), (ins i16imm:$src), + "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src), + "add{l}\t{$src, %eax|%eax, $src}", []>; } let Uses = [EFLAGS] in { @@ -2373,6 +2704,13 @@ let isTwoAddress = 0 in { def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2), "adc{l}\t{$src2, $dst|$dst, $src2}", [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>; + + def ADC8i8 : Ii8<0x14, RawFrm, (outs), (ins i8imm:$src), + "adc{b}\t{$src, %al|%al, $src}", []>; + def ADC16i16 : Ii16<0x15, RawFrm, (outs), (ins i16imm:$src), + "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src), + "adc{l}\t{$src, %eax|%eax, $src}", []>; } } // Uses = [EFLAGS] @@ -2472,6 +2810,13 @@ let isTwoAddress = 0 in { [(store (sub (load addr:$dst), i32immSExt8:$src2), addr:$dst), (implicit EFLAGS)]>; + + def SUB8i8 : Ii8<0x2C, RawFrm, (outs), (ins i8imm:$src), + "sub{b}\t{$src, %al|%al, $src}", []>; + def SUB16i16 : Ii16<0x2D, RawFrm, (outs), (ins i16imm:$src), + "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src), + "sub{l}\t{$src, %eax|%eax, $src}", []>; } let Uses = [EFLAGS] in { @@ -2516,6 +2861,13 @@ let isTwoAddress = 0 in { def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2), "sbb{l}\t{$src2, $dst|$dst, $src2}", [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>; + + def SBB8i8 : Ii8<0x1C, RawFrm, (outs), (ins i8imm:$src), + "sbb{b}\t{$src, %al|%al, $src}", []>; + def SBB16i16 : Ii16<0x1D, RawFrm, (outs), (ins i16imm:$src), + "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src), + "sbb{l}\t{$src, %eax|%eax, $src}", []>; } def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), "sbb{b}\t{$src2, $dst|$dst, $src2}", @@ -2647,6 +2999,13 @@ def TEST32rr : I<0x85, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), (implicit EFLAGS)]>; } +def TEST8i8 : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src), + "test{b}\t{$src, %al|%al, $src}", []>; +def TEST16i16 : Ii16<0xA9, RawFrm, (outs), (ins i16imm:$src), + "test{w}\t{$src, %ax|%ax, $src}", []>, OpSize; +def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src), + "test{l}\t{$src, %eax|%eax, $src}", []>; + def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2), "test{b}\t{$src2, $src1|$src1, $src2}", [(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0), @@ -2878,6 +3237,13 @@ def SETNOm : I<0x91, MRM0m, // Integer comparisons let Defs = [EFLAGS] in { +def CMP8i8 : Ii8<0x3C, RawFrm, (outs), (ins i8imm:$src), + "cmp{b}\t{$src, %al|%al, $src}", []>; +def CMP16i16 : Ii16<0x3D, RawFrm, (outs), (ins i16imm:$src), + "cmp{w}\t{$src, %ax|%ax, $src}", []>, OpSize; +def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src), + "cmp{l}\t{$src, %eax|%eax, $src}", []>; + def CMP8rr : I<0x38, MRMDestReg, (outs), (ins GR8 :$src1, GR8 :$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", @@ -2920,6 +3286,12 @@ def CMP32rm : I<0x3B, MRMSrcMem, "cmp{l}\t{$src2, $src1|$src1, $src2}", [(X86cmp GR32:$src1, (loadi32 addr:$src2)), (implicit EFLAGS)]>; +def CMP8mrmrr : I<0x3A, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2), + "cmp{b}\t{$src2, $src1|$src1, $src2}", []>; +def CMP16mrmrr : I<0x3B, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2), + "cmp{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize; +def CMP32mrmrr : I<0x3B, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2), + "cmp{l}\t{$src2, $src1|$src1, $src2}", []>; def CMP8ri : Ii8<0x80, MRM7r, (outs), (ins GR8:$src1, i8imm:$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", @@ -3095,7 +3467,8 @@ let neverHasSideEffects = 1 in { // Alias instructions that map movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. -let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1 in { +let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, + isCodeGenOnly = 1 in { def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "xor{b}\t$dst, $dst", [(set GR8:$dst, 0)]>; @@ -3127,12 +3500,12 @@ def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym), [(X86tlsaddr tls32addr:$sym)]>, Requires<[In32BitMode]>; -let AddedComplexity = 5 in +let AddedComplexity = 5, isCodeGenOnly = 1 in def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movl\t%gs:$src, $dst", [(set GR32:$dst, (gsload addr:$src))]>, SegGS; -let AddedComplexity = 5 in +let AddedComplexity = 5, isCodeGenOnly = 1 in def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movl\t%fs:$src, $dst", [(set GR32:$dst, (fsload addr:$src))]>, SegFS; @@ -3143,7 +3516,7 @@ def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def DWARF_LOC : I<0, Pseudo, (outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), - ".loc\t${file:debug} ${line:debug} ${col:debug}", + ".loc\t$file $line $col", [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>; @@ -3151,7 +3524,7 @@ def DWARF_LOC : I<0, Pseudo, (outs), // EH Pseudo Instructions // let isTerminator = 1, isReturn = 1, isBarrier = 1, - hasCtrlDep = 1 in { + hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", [(X86ehret GR32:$addr)]>; @@ -3223,6 +3596,78 @@ def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), TB, LOCK; } +// Optimized codegen when the non-memory output is not used. +// FIXME: Use normal add / sub instructions and add lock prefix dynamically. +def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), + "lock\n\t" + "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2), + "lock\n\t" + "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), + "lock\n\t" + "inc{b}\t$dst", []>, LOCK; +def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), + "lock\n\t" + "inc{w}\t$dst", []>, OpSize, LOCK; +def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), + "lock\n\t" + "inc{l}\t$dst", []>, LOCK; + +def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), + "lock\n\t" + "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), + "lock\n\t" + "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), + "lock\n\t" + "dec{b}\t$dst", []>, LOCK; +def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), + "lock\n\t" + "dec{w}\t$dst", []>, OpSize, LOCK; +def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), + "lock\n\t" + "dec{l}\t$dst", []>, LOCK; + // Atomic exchange, and, or, xor let Constraints = "$val = $dst", Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { @@ -3318,6 +3763,25 @@ def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), "#ATOMSWAP6432 PSEUDO!", []>; } +// Segmentation support instructions. + +def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + +// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo. +def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), + "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; + +// String manipulation instructions + +def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>; +def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize; +def LODSD : I<0xAD, RawFrm, (outs), (ins), "lodsd", []>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// @@ -3345,14 +3809,6 @@ def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst), // Calls // tailcall stuff -def : Pat<(X86tailcall GR32:$dst), - (TAILCALL)>; - -def : Pat<(X86tailcall (i32 tglobaladdr:$dst)), - (TAILCALL)>; -def : Pat<(X86tailcall (i32 texternalsym:$dst)), - (TAILCALL)>; - def : Pat<(X86tcret GR32:$dst, imm:$off), (TCRETURNri GR32:$dst, imm:$off)>; @@ -3362,6 +3818,7 @@ def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), (TCRETURNdi texternalsym:$dst, imm:$off)>; +// Normal calls, with various flavors of addresses. def : Pat<(X86call (i32 tglobaladdr:$dst)), (CALLpcrel32 tglobaladdr:$dst)>; def : Pat<(X86call (i32 texternalsym:$dst)), @@ -3472,21 +3929,17 @@ def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; // extload bool -> extload byte def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>; -def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>, - Requires<[In32BitMode]>; +def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; -def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>, - Requires<[In32BitMode]>; +def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; -// anyext -def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>, - Requires<[In32BitMode]>; -def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>, - Requires<[In32BitMode]>; -def : Pat<(i32 (anyext GR16:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; +// anyext. Define these to do an explicit zero-extend to +// avoid partial-register updates. +def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; +def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; +def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>; // (and (i32 load), 255) -> (zextload i8) def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))), @@ -3567,6 +4020,10 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi))>, Requires<[In32BitMode]>; +def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), + (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In32BitMode]>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), x86_subreg_8bit_hi))>, @@ -3961,6 +4418,243 @@ def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst), (implicit EFLAGS)), (DEC32m addr:$dst)>, Requires<[In32BitMode]>; +// Register-Register Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR8:$src1, GR8:$src2), + (implicit EFLAGS)), + (OR8rr GR8:$src1, GR8:$src2)>; +def : Pat<(parallel (X86or_flag GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (OR16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (X86or_flag GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (OR32rr GR32:$src1, GR32:$src2)>; + +// Register-Memory Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR8:$src1, (loadi8 addr:$src2)), + (implicit EFLAGS)), + (OR8rm GR8:$src1, addr:$src2)>; +def : Pat<(parallel (X86or_flag GR16:$src1, (loadi16 addr:$src2)), + (implicit EFLAGS)), + (OR16rm GR16:$src1, addr:$src2)>; +def : Pat<(parallel (X86or_flag GR32:$src1, (loadi32 addr:$src2)), + (implicit EFLAGS)), + (OR32rm GR32:$src1, addr:$src2)>; + +// Register-Integer Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR8:$src1, imm:$src2), + (implicit EFLAGS)), + (OR8ri GR8:$src1, imm:$src2)>; +def : Pat<(parallel (X86or_flag GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (OR16ri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (X86or_flag GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (OR32ri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (X86or_flag GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (OR16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (X86or_flag GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (OR32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// Memory-Register Or with EFLAGS result +def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), GR8:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR8mr addr:$dst, GR8:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), GR16:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR16mr addr:$dst, GR16:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), GR32:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR32mr addr:$dst, GR32:$src2)>; + +// Memory-Integer Or with EFLAGS result +def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR8mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR16mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR32mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), i16immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR16mi8 addr:$dst, i16immSExt8:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), i32immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR32mi8 addr:$dst, i32immSExt8:$src2)>; + +// Register-Register XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR8:$src1, GR8:$src2), + (implicit EFLAGS)), + (XOR8rr GR8:$src1, GR8:$src2)>; +def : Pat<(parallel (X86xor_flag GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (XOR16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (X86xor_flag GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (XOR32rr GR32:$src1, GR32:$src2)>; + +// Register-Memory XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR8:$src1, (loadi8 addr:$src2)), + (implicit EFLAGS)), + (XOR8rm GR8:$src1, addr:$src2)>; +def : Pat<(parallel (X86xor_flag GR16:$src1, (loadi16 addr:$src2)), + (implicit EFLAGS)), + (XOR16rm GR16:$src1, addr:$src2)>; +def : Pat<(parallel (X86xor_flag GR32:$src1, (loadi32 addr:$src2)), + (implicit EFLAGS)), + (XOR32rm GR32:$src1, addr:$src2)>; + +// Register-Integer XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR8:$src1, imm:$src2), + (implicit EFLAGS)), + (XOR8ri GR8:$src1, imm:$src2)>; +def : Pat<(parallel (X86xor_flag GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (XOR16ri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (X86xor_flag GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (XOR32ri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (X86xor_flag GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (X86xor_flag GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// Memory-Register XOr with EFLAGS result +def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), GR8:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR8mr addr:$dst, GR8:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), GR16:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR16mr addr:$dst, GR16:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), GR32:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR32mr addr:$dst, GR32:$src2)>; + +// Memory-Integer XOr with EFLAGS result +def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR8mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR16mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR32mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), i16immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR16mi8 addr:$dst, i16immSExt8:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), i32immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR32mi8 addr:$dst, i32immSExt8:$src2)>; + +// Register-Register And with EFLAGS result +def : Pat<(parallel (X86and_flag GR8:$src1, GR8:$src2), + (implicit EFLAGS)), + (AND8rr GR8:$src1, GR8:$src2)>; +def : Pat<(parallel (X86and_flag GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (AND16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (X86and_flag GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (AND32rr GR32:$src1, GR32:$src2)>; + +// Register-Memory And with EFLAGS result +def : Pat<(parallel (X86and_flag GR8:$src1, (loadi8 addr:$src2)), + (implicit EFLAGS)), + (AND8rm GR8:$src1, addr:$src2)>; +def : Pat<(parallel (X86and_flag GR16:$src1, (loadi16 addr:$src2)), + (implicit EFLAGS)), + (AND16rm GR16:$src1, addr:$src2)>; +def : Pat<(parallel (X86and_flag GR32:$src1, (loadi32 addr:$src2)), + (implicit EFLAGS)), + (AND32rm GR32:$src1, addr:$src2)>; + +// Register-Integer And with EFLAGS result +def : Pat<(parallel (X86and_flag GR8:$src1, imm:$src2), + (implicit EFLAGS)), + (AND8ri GR8:$src1, imm:$src2)>; +def : Pat<(parallel (X86and_flag GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (AND16ri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (X86and_flag GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (AND32ri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (X86and_flag GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (AND16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (X86and_flag GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// Memory-Register And with EFLAGS result +def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), GR8:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND8mr addr:$dst, GR8:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), GR16:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND16mr addr:$dst, GR16:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), GR32:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND32mr addr:$dst, GR32:$src2)>; + +// Memory-Integer And with EFLAGS result +def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND8mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND16mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND32mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), i16immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND16mi8 addr:$dst, i16immSExt8:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), i32immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND32mi8 addr:$dst, i32immSExt8:$src2)>; + +// -disable-16bit support. +def : Pat<(truncstorei16 (i32 imm:$src), addr:$dst), + (MOV16mi addr:$dst, imm:$src)>; +def : Pat<(truncstorei16 GR32:$src, addr:$dst), + (MOV16mr addr:$dst, (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>; +def : Pat<(i32 (sextloadi16 addr:$dst)), + (MOVSX32rm16 addr:$dst)>; +def : Pat<(i32 (zextloadi16 addr:$dst)), + (MOVZX32rm16 addr:$dst)>; +def : Pat<(i32 (extloadi16 addr:$dst)), + (MOVZX32rm16 addr:$dst)>; + //===----------------------------------------------------------------------===// // Floating Point Stack Support //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index b79a006..ce76b4e 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -1,10 +1,10 @@ //====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the X86 MMX instruction set, defining the instructions, @@ -67,16 +67,18 @@ def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), // MMX Multiclasses //===----------------------------------------------------------------------===// -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { // MMXI_binop_rm - Simple MMX binary operator. multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { - def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), + def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> { let isCommutable = Commutable; } - def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), + def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (OpVT (OpNode VR64:$src1, (bitconvert @@ -85,12 +87,14 @@ let isTwoAddress = 1 in { multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, bit Commutable = 0> { - def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), + def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> { let isCommutable = Commutable; } - def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), + def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId VR64:$src1, (bitconvert (load_mmx addr:$src2))))]>; @@ -139,8 +143,10 @@ let isTwoAddress = 1 in { // MMX EMMS & FEMMS Instructions //===----------------------------------------------------------------------===// -def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>; -def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; +def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", + [(int_x86_mmx_emms)]>; +def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", + [(int_x86_mmx_femms)]>; //===----------------------------------------------------------------------===// // MMX Scalar Instructions @@ -149,12 +155,14 @@ def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)] // Data Transfer Instructions def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (v2i32 (scalar_to_vector GR32:$src)))]>; + [(set VR64:$dst, + (v2i32 (scalar_to_vector GR32:$src)))]>; let canFoldAsLoad = 1, isReMaterializable = 1 in def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>; -let mayStore = 1 in + [(set VR64:$dst, + (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>; +let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; @@ -164,9 +172,16 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), []>; let neverHasSideEffects = 1 in -def MMX_MOVD64from64rr : MMXRI<0x7E, MRMSrcReg, +// These are 64 bit moves, but since the OS X assembler doesn't +// recognize a register-register movq, we write them as +// movd. +def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; +def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, + (v1i64 (scalar_to_vector GR64:$src)))]>; let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), @@ -179,21 +194,21 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (v1i64 VR64:$src), addr:$dst)]>; -def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src), +def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (v1i64 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))))))]>; -def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src), +def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (movl immAllZerosV, (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>; let neverHasSideEffects = 1 in -def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src), +def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>; def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), @@ -207,7 +222,8 @@ def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), [(set VR64:$dst, (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>; let AddedComplexity = 20 in -def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), +def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), + (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (v2i32 (X86vzmovl (v2i32 @@ -265,7 +281,7 @@ defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>; defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>; defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "pandn\t{$src2, $dst|$dst, $src2}", @@ -316,33 +332,33 @@ defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>; // Conversion Instructions // -- Unpack Instructions -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { // Unpack High Packed Data Instructions - def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg, + def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, + def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, (v8i8 (mmx_unpckh VR64:$src1, (bc_v8i8 (load_mmx addr:$src2)))))]>; - def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, + def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, + def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, (v4i16 (mmx_unpckh VR64:$src1, (bc_v4i16 (load_mmx addr:$src2)))))]>; - def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, + def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, @@ -379,12 +395,12 @@ let isTwoAddress = 1 in { (v4i16 (mmx_unpckl VR64:$src1, (bc_v4i16 (load_mmx addr:$src2)))))]>; - def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, + def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, + def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, @@ -415,19 +431,22 @@ let neverHasSideEffects = 1 in { def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvtpd2pi\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in -def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), +def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), + (ins f128mem:$src), "cvtpd2pi\t{$src, $dst|$dst, $src}", []>; def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in -def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), +def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), + (ins i64mem:$src), "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in -def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), +def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), + (ins i64mem:$src), "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), @@ -439,7 +458,8 @@ def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in -def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), +def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), + (ins f128mem:$src), "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), @@ -459,14 +479,16 @@ def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg, "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1), (iPTR imm:$src2)))]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, i16i8imm:$src3), + (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, + i16i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), - GR32:$src2, (iPTR imm:$src3))))]>; + GR32:$src2,(iPTR imm:$src3))))]>; def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3), + (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2, + i16i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), @@ -494,7 +516,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), //===----------------------------------------------------------------------===// // Alias instructions that map zero vector to pxor. -let isReMaterializable = 1 in { +let isReMaterializable = 1, isCodeGenOnly = 1 in { def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "pxor\t$dst, $dst", [(set VR64:$dst, (v2i32 immAllZerosV))]>; @@ -579,7 +601,7 @@ def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), let AddedComplexity = 20 in { def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; + (MMX_MOVZDI2PDIrm addr:$src)>; } // Clear top half. @@ -657,6 +679,33 @@ def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))))), (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>; +// Patterns for vector comparisons +def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, VR64:$src2)), + (MMX_PCMPEQBrr VR64:$src1, VR64:$src2)>; +def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, (bitconvert (load_mmx addr:$src2)))), + (MMX_PCMPEQBrm VR64:$src1, addr:$src2)>; +def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, VR64:$src2)), + (MMX_PCMPEQWrr VR64:$src1, VR64:$src2)>; +def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, (bitconvert (load_mmx addr:$src2)))), + (MMX_PCMPEQWrm VR64:$src1, addr:$src2)>; +def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, VR64:$src2)), + (MMX_PCMPEQDrr VR64:$src1, VR64:$src2)>; +def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, (bitconvert (load_mmx addr:$src2)))), + (MMX_PCMPEQDrm VR64:$src1, addr:$src2)>; + +def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, VR64:$src2)), + (MMX_PCMPGTBrr VR64:$src1, VR64:$src2)>; +def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, (bitconvert (load_mmx addr:$src2)))), + (MMX_PCMPGTBrm VR64:$src1, addr:$src2)>; +def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, VR64:$src2)), + (MMX_PCMPGTWrr VR64:$src1, VR64:$src2)>; +def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, (bitconvert (load_mmx addr:$src2)))), + (MMX_PCMPGTWrm VR64:$src1, addr:$src2)>; +def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)), + (MMX_PCMPGTDrr VR64:$src1, VR64:$src2)>; +def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))), + (MMX_PCMPGTDrm VR64:$src1, addr:$src2)>; + // CMOV* - Used to implement the SELECT DAG operation. Expanded by the // scheduler into a branch sequence. // These are expanded by the scheduler. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5d6ef36..96fc932 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1,10 +1,10 @@ //====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the X86 SSE instruction set, defining the instructions, @@ -36,22 +36,22 @@ def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; -def X86pshufb : SDNode<"X86ISD::PSHUFB", +def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; def X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; -def X86pinsrb : SDNode<"X86ISD::PINSRB", +def X86pinsrb : SDNode<"X86ISD::PINSRB", SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def X86pinsrw : SDNode<"X86ISD::PINSRW", +def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def X86insrtps : SDNode<"X86ISD::INSERTPS", +def X86insrtps : SDNode<"X86ISD::INSERTPS", SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, - SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>; + SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, @@ -69,6 +69,10 @@ def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; +def SDTX86CmpPTest : SDTypeProfile<0, 2, [SDTCisVT<0, v4f32>, + SDTCisVT<1, v4f32>]>; +def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// @@ -83,11 +87,13 @@ def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [], def ssmem : Operand<v4f32> { let PrintMethod = "printf32mem"; - let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm); + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; } def sdmem : Operand<v2f64> { let PrintMethod = "printf64mem"; - let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm); + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; } //===----------------------------------------------------------------------===// @@ -179,13 +185,13 @@ def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShuffleSHUFImmediate(N)); }]>; -// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to +// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to // PSHUFHW imm. def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShufflePSHUFHWImmediate(N)); }]>; -// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to +// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to // PSHUFLW imm. def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); @@ -360,25 +366,25 @@ def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>; def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), "cvtps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtps2pi + [(set VR64:$dst, (int_x86_sse_cvtps2pi (load addr:$src)))]>; def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvttps2pi\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>; def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), "cvttps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttps2pi + [(set VR64:$dst, (int_x86_sse_cvttps2pi (load addr:$src)))]>; let Constraints = "$src1 = $dst" in { - def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg, + def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR64:$src2), "cvtpi2ps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, VR64:$src2))]>; - def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem, + def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2), "cvtpi2ps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, + [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, (load addr:$src2)))]>; } @@ -407,11 +413,11 @@ let Constraints = "$src1 = $dst" in { // Comparison instructions let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { - def CMPSSrr : SSIi8<0xC2, MRMSrcReg, + def CMPSSrr : SSIi8<0xC2, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in - def CMPSSrm : SSIi8<0xC2, MRMSrcMem, + def CMPSSrm : SSIi8<0xC2, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; } @@ -428,13 +434,15 @@ def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2), // Aliases to match intrinsics which expect XMM operand(s). let Constraints = "$src1 = $dst" in { - def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), + def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, + SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, - VR128:$src, imm:$cc))]>; - def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc), + VR128:$src, imm:$cc))]>; + def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, + SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, (load addr:$src), imm:$cc))]>; @@ -460,18 +468,19 @@ def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), (implicit EFLAGS)]>; } // Defs = [EFLAGS] -// Aliases of packed SSE1 instructions for scalar use. These all have names that -// start with 'Fs'. +// Aliases of packed SSE1 instructions for scalar use. These all have names +// that start with 'Fs'. // Alias instructions that map fld0 to pxor for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, + canFoldAsLoad = 1 in def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>, TB, OpSize; // Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are // disregarded. -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1 in def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), "movaps\t{$src, $dst|$dst, $src}", []>; @@ -552,7 +561,7 @@ multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr, (ins FR32:$src1, f32mem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>; - + // Vector operation, reg+reg. def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -616,7 +625,7 @@ multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr, (ins FR32:$src1, f32mem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>; - + // Vector operation, reg+reg. def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -671,7 +680,7 @@ defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin, // SSE packed FP Instructions // Move Instructions -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1 in def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", []>; let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in @@ -708,13 +717,13 @@ let Constraints = "$src1 = $dst" in { def MOVLPSrm : PSI<0x12, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movlps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, + [(set VR128:$dst, (movlp VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; def MOVHPSrm : PSI<0x16, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, + [(set VR128:$dst, (movhp VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; } // AddedComplexity @@ -789,7 +798,7 @@ multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr, def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))]>; - + // Vector operation, reg. def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), @@ -890,12 +899,12 @@ let Constraints = "$src1 = $dst" in { } let Constraints = "$src1 = $dst" in { - def CMPPSrri : PSIi8<0xC2, MRMSrcReg, + def CMPPSrri : PSIi8<0xC2, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), "cmp${cc}ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, VR128:$src, imm:$cc))]>; - def CMPPSrmi : PSIi8<0xC2, MRMSrcMem, + def CMPPSrmi : PSIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc), "cmp${cc}ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, @@ -909,13 +918,13 @@ def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), // Shuffle and unpack instructions let Constraints = "$src1 = $dst" in { let isConvertibleToThreeAddress = 1 in // Convert to pshufd - def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, + def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; - def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, + def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, i8imm:$src3), "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", @@ -924,24 +933,24 @@ let Constraints = "$src1 = $dst" in { VR128:$src1, (memopv4f32 addr:$src2))))]>; let AddedComplexity = 10 in { - def UNPCKHPSrr : PSI<0x15, MRMSrcReg, + def UNPCKHPSrr : PSI<0x15, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>; - def UNPCKHPSrm : PSI<0x15, MRMSrcMem, + def UNPCKHPSrm : PSI<0x15, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (unpckh VR128:$src1, (memopv4f32 addr:$src2))))]>; - def UNPCKLPSrr : PSI<0x14, MRMSrcReg, + def UNPCKLPSrr : PSI<0x14, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>; - def UNPCKLPSrm : PSI<0x14, MRMSrcMem, + def UNPCKLPSrm : PSI<0x14, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -984,7 +993,8 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), // Alias instructions that map zero vector to pxor / xorp* for sse. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1 in def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "xorps\t$dst, $dst", [(set VR128:$dst, (v4i32 immAllZerosV))]>; @@ -1046,14 +1056,14 @@ let AddedComplexity = 20 in def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), "movss\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector - (loadf32 addr:$src))))))]>; + (loadf32 addr:$src))))))]>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), (MOVZSS2PSrm addr:$src)>; -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // SSE2 Instructions -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // Move Instructions let neverHasSideEffects = 1 in @@ -1077,7 +1087,7 @@ def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src), def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))]>; -def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), +def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround (loadf64 addr:$src)))]>; def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src), @@ -1087,6 +1097,27 @@ def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src), "cvtsi2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>; +def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; +def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; +def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>; +def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PSrr : PSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2ps\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PSrm : PSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtdq2ps\t{$src, $dst|$dst, $src}", []>; +def COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "comisd\t{$src2, $src1|$src1, $src2}", []>; +def COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), + "comisd\t{$src2, $src1|$src1, $src2}", []>; + // SSE2 instructions with XS prefix def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", @@ -1112,21 +1143,21 @@ def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>; def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), "cvtpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtpd2pi + [(set VR64:$dst, (int_x86_sse_cvtpd2pi (memop addr:$src)))]>; def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvttpd2pi\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>; def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), "cvttpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttpd2pi + [(set VR64:$dst, (int_x86_sse_cvttpd2pi (memop addr:$src)))]>; def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "cvtpi2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>; def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtpi2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cvtpi2pd + [(set VR128:$dst, (int_x86_sse_cvtpi2pd (load addr:$src)))]>; // Aliases for intrinsics @@ -1141,11 +1172,11 @@ def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), // Comparison instructions let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { - def CMPSDrr : SDIi8<0xC2, MRMSrcReg, + def CMPSDrr : SDIi8<0xC2, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in - def CMPSDrm : SDIi8<0xC2, MRMSrcMem, + def CMPSDrm : SDIi8<0xC2, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; } @@ -1162,13 +1193,15 @@ def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2), // Aliases to match intrinsics which expect XMM operand(s). let Constraints = "$src1 = $dst" in { - def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), + def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, + SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, VR128:$src, imm:$cc))]>; - def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc), + def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, + SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, (load addr:$src), imm:$cc))]>; @@ -1194,11 +1227,12 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), (implicit EFLAGS)]>; } // Defs = [EFLAGS] -// Aliases of packed SSE2 instructions for scalar use. These all have names that -// start with 'Fs'. +// Aliases of packed SSE2 instructions for scalar use. These all have names +// that start with 'Fs'. // Alias instructions that map fld0 to pxor for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, + canFoldAsLoad = 1 in def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>, TB, OpSize; @@ -1286,7 +1320,7 @@ multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr, (ins FR64:$src1, f64mem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>; - + // Vector operation, reg+reg. def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1350,7 +1384,7 @@ multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr, (ins FR64:$src1, f64mem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>; - + // Vector operation, reg+reg. def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1402,7 +1436,7 @@ defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax, defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin, int_x86_sse2_min_sd, int_x86_sse2_min_pd>; -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // SSE packed FP Instructions // Move Instructions @@ -1442,13 +1476,13 @@ let Constraints = "$src1 = $dst" in { def MOVLPDrm : PDI<0x12, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movlpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, + [(set VR128:$dst, (v2f64 (movlp VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))]>; def MOVHPDrm : PDI<0x16, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, + [(set VR128:$dst, (v2f64 (movhp VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))]>; } // AddedComplexity @@ -1564,7 +1598,7 @@ def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg, [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>; def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "cvtsd2ss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, (load addr:$src2)))]>; @@ -1612,7 +1646,7 @@ multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr, def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode (load addr:$src)))]>; - + // Vector operation, reg. def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), @@ -1712,12 +1746,12 @@ let Constraints = "$src1 = $dst" in { } let Constraints = "$src1 = $dst" in { - def CMPPDrri : PDIi8<0xC2, MRMSrcReg, + def CMPPDrri : PDIi8<0xC2, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), "cmp${cc}pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, VR128:$src, imm:$cc))]>; - def CMPPDrmi : PDIi8<0xC2, MRMSrcMem, + def CMPPDrmi : PDIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc), "cmp${cc}pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, @@ -1730,12 +1764,12 @@ def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), // Shuffle and unpack instructions let Constraints = "$src1 = $dst" in { - def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, + def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; - def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, + def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, i8imm:$src3), "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", @@ -1744,24 +1778,24 @@ let Constraints = "$src1 = $dst" in { VR128:$src1, (memopv2f64 addr:$src2))))]>; let AddedComplexity = 10 in { - def UNPCKHPDrr : PDI<0x15, MRMSrcReg, + def UNPCKHPDrr : PDI<0x15, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>; - def UNPCKHPDrm : PDI<0x15, MRMSrcMem, + def UNPCKHPDrm : PDI<0x15, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2f64 (unpckh VR128:$src1, (memopv2f64 addr:$src2))))]>; - def UNPCKLPDrr : PDI<0x14, MRMSrcReg, + def UNPCKLPDrr : PDI<0x14, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>; - def UNPCKLPDrm : PDI<0x14, MRMSrcMem, + def UNPCKLPDrm : PDI<0x14, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -1770,7 +1804,7 @@ let Constraints = "$src1 = $dst" in { } // Constraints = "$src1 = $dst" -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // SSE integer instructions // Move Instructions @@ -1825,14 +1859,17 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, Intrinsic IntId2> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, + VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, + i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>; - def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + (bitconvert (memopv2i64 addr:$src2))))]>; + def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, + i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>; } @@ -1840,15 +1877,17 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, + VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> { let isCommutable = Commutable; } - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, + i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpVT (OpNode VR128:$src1, - (bitconvert (memopv2i64 addr:$src2)))))]>; + (bitconvert (memopv2i64 addr:$src2)))))]>; } /// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64. @@ -1858,14 +1897,17 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, /// multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode, bit Commutable = 0> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> { let isCommutable = Commutable; } - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64 addr:$src2)))]>; + [(set VR128:$dst, (OpNode VR128:$src1, + (memopv2i64 addr:$src2)))]>; } } // Constraints = "$src1 = $dst" @@ -2029,8 +2071,8 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4i32 (pshufd:$src2 - (bc_v4i32(memopv2i64 addr:$src1)), - (undef))))]>; + (bc_v4i32(memopv2i64 addr:$src1)), + (undef))))]>; // SSE2 with ImmT == Imm8 and XS prefix. def PSHUFHWri : Ii8<0x70, MRMSrcReg, @@ -2043,8 +2085,8 @@ def PSHUFHWmi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v8i16 (pshufhw:$src2 - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef))))]>, + (bc_v8i16 (memopv2i64 addr:$src1)), + (undef))))]>, XS, Requires<[HasSSE2]>; // SSE2 with ImmT == Imm8 and XD prefix. @@ -2064,90 +2106,90 @@ def PSHUFLWmi : Ii8<0x70, MRMSrcMem, let Constraints = "$src1 = $dst" in { - def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, + def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, + def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (unpckl VR128:$src1, (bc_v16i8 (memopv2i64 addr:$src2))))]>; - def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, + def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, + def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (unpckl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2))))]>; - def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, + def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, + def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (unpckl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2))))]>; - def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, + def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, + def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2i64 (unpckl VR128:$src1, (memopv2i64 addr:$src2))))]>; - - def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, + + def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, + def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, + [(set VR128:$dst, + (unpckh VR128:$src1, (bc_v16i8 (memopv2i64 addr:$src2))))]>; - def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, + def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, + def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (unpckh VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2))))]>; - def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, + def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, + def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (unpckh VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2))))]>; - def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, + def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, + def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -2172,7 +2214,7 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, i32i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, + [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), imm:$src3))]>; } @@ -2202,7 +2244,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, + [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, TB, Requires<[HasSSE2]>; // Flush cache @@ -2217,17 +2259,18 @@ def MFENCE : I<0xAE, MRM6r, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; //TODO: custom lower this so as to never even generate the noop -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), +def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), (i8 0)), (NOOP)>; def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), +def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), (i8 1)), (MFENCE)>; // Alias instructions that map zero vector to pxor / xorp* for sse. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1 in def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "pcmpeqd\t$dst, $dst", [(set VR128:$dst, (v4i32 immAllOnesV))]>; @@ -2240,7 +2283,7 @@ def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src), (v2f64 (scalar_to_vector FR64:$src)))]>; def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, + [(set VR128:$dst, (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>; def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), @@ -2399,9 +2442,9 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), (MOVZPQILo2PQIrm addr:$src)>; } -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // SSE3 Instructions -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // Move Instructions def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -2525,9 +2568,9 @@ let AddedComplexity = 20 in def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // SSSE3 Instructions -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// /// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8. multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr, @@ -2801,12 +2844,13 @@ def : Pat<(X86pshufb VR128:$src, VR128:$mask), def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // Non-Instruction Patterns -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// -// extload f32 -> f64. This matches load+fextend because we have a hack in -// the isel (PreprocessForFPConvert) that can introduce loads after dag combine. +// extload f32 -> f64. This matches load+fextend because we have a hack in +// the isel (PreprocessForFPConvert) that can introduce loads after dag +// combine. // Since these loads aren't folded into the fextend, we have to match it // explicitly here. let Predicates = [HasSSE2] in @@ -2884,12 +2928,12 @@ def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), Requires<[HasSSE2]>; // Special unary SHUFPDrri case. def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFPDrri VR128:$src1, VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; // Special unary SHUFPDrri case. def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFPDrri VR128:$src1, VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; // Unary v4f32 shuffle with PSHUF* in order to fold a load. @@ -2899,16 +2943,16 @@ def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), // Special binary v4i32 shuffle cases with SHUFPS. def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), - (SHUFPSrri VR128:$src1, VR128:$src2, + (SHUFPSrri VR128:$src1, VR128:$src2, (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (SHUFPSrmi VR128:$src1, addr:$src2, + (SHUFPSrmi VR128:$src1, addr:$src2, (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; // Special binary v2i64 shuffle cases using SHUFPDrri. def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), - (SHUFPDrri VR128:$src1, VR128:$src2, + (SHUFPDrri VR128:$src1, VR128:$src2, (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; @@ -3030,7 +3074,7 @@ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but // fall back to this for SSE1) def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), - (SHUFPSrri VR128:$src2, VR128:$src1, + (SHUFPSrri VR128:$src2, VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>; // Set lowest element and zero upper elements. @@ -3097,7 +3141,7 @@ def : Pat<(store (v8i16 VR128:$src), addr:$dst), (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; - + //===----------------------------------------------------------------------===// // SSE4.1 Instructions //===----------------------------------------------------------------------===// @@ -3108,7 +3152,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, Intrinsic V2F64Int> { // Intrinsic operation, reg. // Vector intrinsic operation, reg - def PSr_Int : SS4AIi8<opcps, MRMSrcReg, + def PSr_Int : SS4AIi8<opcps, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -3149,41 +3193,41 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, Intrinsic F64Int> { // Intrinsic operation, reg. def SSr_Int : SS4AIi8<opcss, MRMSrcReg, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, + [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, OpSize; // Intrinsic operation, mem. - def SSm_Int : SS4AIi8<opcss, MRMSrcMem, - (outs VR128:$dst), + def SSm_Int : SS4AIi8<opcss, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, + [(set VR128:$dst, (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, OpSize; // Intrinsic operation, reg. def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, + [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, OpSize; // Intrinsic operation, mem. def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, + [(set VR128:$dst, (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, OpSize; } @@ -3302,9 +3346,9 @@ let Constraints = "$src1 = $dst" in { Intrinsic IntId128, bit Commutable = 0> { def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>, OpSize { let isCommutable = Commutable; @@ -3339,7 +3383,7 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in { multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> { def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"), [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>, OpSize; @@ -3471,13 +3515,13 @@ def : Pat<(int_x86_sse41_pmovzxbq multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>, OpSize; def mr : SS4AIi8<opc, MRMDestMem, (outs), (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; // FIXME: @@ -3492,7 +3536,7 @@ defm PEXTRB : SS41I_extract8<0x14, "pextrb">; multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { def mr : SS4AIi8<opc, MRMDestMem, (outs), (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; // FIXME: @@ -3507,13 +3551,13 @@ defm PEXTRW : SS41I_extract16<0x15, "pextrw">; multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> { def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32:$dst, (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize; def mr : SS4AIi8<opc, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(store (extractelt (v4i32 VR128:$src1), imm:$src2), addr:$dst)]>, OpSize; @@ -3527,14 +3571,14 @@ defm PEXTRD : SS41I_extract32<0x16, "pextrd">; multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>, OpSize; - def mr : SS4AIi8<opc, MRMDestMem, (outs), + def mr : SS4AIi8<opc, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), addr:$dst)]>, OpSize; @@ -3553,15 +3597,15 @@ let Constraints = "$src1 = $dst" in { multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> { def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, + [(set VR128:$dst, (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize; def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, + [(set VR128:$dst, (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), imm:$src3))]>, OpSize; } @@ -3573,16 +3617,16 @@ let Constraints = "$src1 = $dst" in { multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> { def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, + [(set VR128:$dst, (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, OpSize; def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, + [(set VR128:$dst, (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>, OpSize; } @@ -3590,37 +3634,57 @@ let Constraints = "$src1 = $dst" in { defm PINSRD : SS41I_insert32<0x22, "pinsrd">; +// insertps has a few different modes, there's the first two here below which +// are optimized inserts that won't zero arbitrary elements in the destination +// vector. The next one matches the intrinsic and could zero arbitrary elements +// in the target vector. let Constraints = "$src1 = $dst" in { multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> { def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, FR32:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, + (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (X86insrtps VR128:$src1, FR32:$src2, imm:$src3))]>, OpSize; + [(set VR128:$dst, + (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, + OpSize; def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (X86insrtps VR128:$src1, (loadf32 addr:$src2), + [(set VR128:$dst, + (X86insrtps VR128:$src1, + (v4f32 (scalar_to_vector (loadf32 addr:$src2))), imm:$src3))]>, OpSize; } } defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), + (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; + +// ptest instruction we'll lower to this in X86ISelLowering primarily from +// the intel intrinsic that corresponds to this. let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize; + "ptest \t{$src2, $src1|$src1, $src2}", + [(X86ptest VR128:$src1, VR128:$src2), + (implicit EFLAGS)]>, OpSize; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize; + "ptest \t{$src2, $src1|$src1, $src2}", + [(X86ptest VR128:$src1, (load addr:$src2)), + (implicit EFLAGS)]>, OpSize; } def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; + +//===----------------------------------------------------------------------===// +// SSE4.2 Instructions +//===----------------------------------------------------------------------===// + /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator let Constraints = "$src1 = $dst" in { multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr, @@ -3647,3 +3711,171 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), (PCMPGTQrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), (PCMPGTQrm VR128:$src1, addr:$src2)>; + +// crc intrinsic instruction +// This set of instructions are only rm, the only difference is the size +// of r and m. +let Constraints = "$src1 = $dst" in { + def CRC32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$src1, i8mem:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR32:$dst, + (int_x86_sse42_crc32_8 GR32:$src1, + (load addr:$src2)))]>, OpSize; + def CRC32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, GR8:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR32:$dst, + (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>, + OpSize; + def CRC32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$src1, i16mem:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR32:$dst, + (int_x86_sse42_crc32_16 GR32:$src1, + (load addr:$src2)))]>, + OpSize; + def CRC32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, GR16:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR32:$dst, + (int_x86_sse42_crc32_16 GR32:$src1, GR16:$src2))]>, + OpSize; + def CRC32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$src1, i32mem:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR32:$dst, + (int_x86_sse42_crc32_32 GR32:$src1, + (load addr:$src2)))]>, OpSize; + def CRC32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR32:$dst, + (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>, + OpSize; + def CRC64m64 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$src1, i64mem:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR64:$dst, + (int_x86_sse42_crc32_64 GR64:$src1, + (load addr:$src2)))]>, + OpSize, REX_W; + def CRC64r64 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR64:$dst, + (int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>, + OpSize, REX_W; +} + +// String/text processing instructions. +let Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { +def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "#PCMPISTRM128rr PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, + imm:$src3))]>, OpSize; +def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "#PCMPISTRM128rm PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpistrm128 VR128:$src1, + (load addr:$src2), + imm:$src3))]>, OpSize; +} + +let Defs = [XMM0, EFLAGS] in { +def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", + []>, OpSize; +def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", + []>, OpSize; +} + +let Defs = [EFLAGS], Uses = [EAX, EDX], + usesCustomDAGSchedInserter = 1 in { +def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "#PCMPESTRM128rr PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, + VR128:$src3, + EDX, imm:$src5))]>, OpSize; +def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "#PCMPESTRM128rm PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, + (load addr:$src3), + EDX, imm:$src5))]>, OpSize; +} + +let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { +def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", + []>, OpSize; +def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", + []>, OpSize; +} + +let Defs = [ECX, EFLAGS] in { + multiclass SS42AI_pcmpistri<Intrinsic IntId128> { + def rr : SS42AI<0x63, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", + [(set ECX, + (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), + (implicit EFLAGS)]>, + OpSize; + def rm : SS42AI<0x63, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", + [(set ECX, + (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), + (implicit EFLAGS)]>, + OpSize; + } +} + +defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>; +defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>; +defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>; +defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>; +defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>; +defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>; + +let Defs = [ECX, EFLAGS] in { +let Uses = [EAX, EDX] in { + multiclass SS42AI_pcmpestri<Intrinsic IntId128> { + def rr : SS42AI<0x61, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", + [(set ECX, + (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), + (implicit EFLAGS)]>, + OpSize; + def rm : SS42AI<0x61, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", + [(set ECX, + (IntId128 VR128:$src1, EAX, (load addr:$src3), + EDX, imm:$src5)), + (implicit EFLAGS)]>, + OpSize; + } +} +} + +defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>; +defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>; +defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>; +defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>; +defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>; +defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index f923106..62ca47f 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -15,15 +15,16 @@ #include "X86JITInfo.h" #include "X86Relocations.h" #include "X86Subtarget.h" +#include "X86TargetMachine.h" #include "llvm/Function.h" -#include "llvm/Config/alloca.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include <cstdlib> #include <cstring> using namespace llvm; // Determine the platform we're running on -#if defined (__x86_64__) || defined (_M_AMD64) +#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64) # define X86_64_JIT #elif defined(__i386__) || defined(i386) || defined(_M_IX86) # define X86_32_JIT @@ -51,13 +52,6 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction; #define GETASMPREFIX(X) GETASMPREFIX2(X) #define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) -// Check if building with -fPIC -#if defined(__PIC__) && __PIC__ && defined(__linux__) -#define ASMCALLSUFFIX "@PLT" -#else -#define ASMCALLSUFFIX -#endif - // For ELF targets, use a .size and .type directive, to let tools // know the extent of functions defined in assembler. #if defined(__ELF__) @@ -130,7 +124,7 @@ extern "C" { // JIT callee "movq %rbp, %rdi\n" // Pass prev frame and return address "movq 8(%rbp), %rsi\n" - "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n" + "call " ASMPREFIX "X86CompilationCallback2\n" // Restore all XMM arg registers "movaps 112(%rsp), %xmm7\n" "movaps 96(%rsp), %xmm6\n" @@ -206,7 +200,7 @@ extern "C" { "movl 4(%ebp), %eax\n" // Pass prev frame and return address "movl %eax, 4(%esp)\n" "movl %ebp, (%esp)\n" - "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n" + "call " ASMPREFIX "X86CompilationCallback2\n" "movl %ebp, %esp\n" // Restore ESP CFI(".cfi_def_cfa_register %esp\n") "subl $12, %esp\n" @@ -262,7 +256,7 @@ extern "C" { "movl 4(%ebp), %eax\n" // Pass prev frame and return address "movl %eax, 4(%esp)\n" "movl %ebp, (%esp)\n" - "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n" + "call " ASMPREFIX "X86CompilationCallback2\n" "addl $16, %esp\n" "movaps 48(%esp), %xmm3\n" CFI(".cfi_restore %xmm3\n") @@ -321,8 +315,7 @@ extern "C" { #else // Not an i386 host void X86CompilationCallback() { - assert(0 && "Cannot call X86CompilationCallback() on a non-x86 arch!\n"); - abort(); + llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!"); } #endif } @@ -331,14 +324,21 @@ extern "C" { /// function stub when we did not know the real target of a call. This function /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. -extern "C" void ATTRIBUTE_USED +extern "C" { +#if !(defined (X86_64_JIT) && defined(_MSC_VER)) + // the following function is called only from this translation unit, + // unless we are under 64bit Windows with MSC, where there is + // no support for inline assembly +static +#endif +void ATTRIBUTE_USED X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; assert(*RetAddrLoc == RetAddr && "Could not find return address on the stack!"); // It's a stub if there is an interrupt marker after the call. - bool isStub = ((unsigned char*)RetAddr)[0] == 0xCD; + bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE; // The call instruction should have pushed the return value onto the stack... #if defined (X86_64_JIT) @@ -348,10 +348,10 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { #endif #if 0 - DOUT << "In callback! Addr=" << (void*)RetAddr - << " ESP=" << (void*)StackPtr - << ": Resolving call to function: " - << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n"; + DEBUG(errs() << "In callback! Addr=" << (void*)RetAddr + << " ESP=" << (void*)StackPtr + << ": Resolving call to function: " + << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n"); #endif // Sanity check to make sure this really is a call instruction. @@ -377,7 +377,7 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { // If this is a stub, rewrite the call into an unconditional branch // instruction so that two return addresses are not pushed onto the stack // when the requested function finally gets called. This also makes the - // 0xCD byte (interrupt) dead, so the marker doesn't effect anything. + // 0xCE byte (interrupt) dead, so the marker doesn't effect anything. #if defined (X86_64_JIT) // If the target address is within 32-bit range of the stub, use a // PC-relative branch instead of loading the actual address. (This is @@ -403,31 +403,26 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { *RetAddrLoc -= 5; #endif } +} TargetJITInfo::LazyResolverFn X86JITInfo::getLazyResolverFunction(JITCompilerFn F) { JITCompilerFunction = F; #if defined (X86_32_JIT) && !defined (_MSC_VER) - unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; - union { - unsigned u[3]; - char c[12]; - } text; - - if (!X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) { - // FIXME: support for AMD family of processors. - if (memcmp(text.c, "GenuineIntel", 12) == 0) { - X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); - if ((EDX >> 25) & 0x1) - return X86CompilationCallback_SSE; - } - } + if (Subtarget->hasSSE1()) + return X86CompilationCallback_SSE; #endif return X86CompilationCallback; } +X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) { + Subtarget = &TM.getSubtarget<X86Subtarget>(); + useGOT = 0; + TLSOffset = 0; +} + void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, JITCodeEmitter &JCE) { #if defined (X86_64_JIT) @@ -485,7 +480,10 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Fn, JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); #endif - JCE.emitByte(0xCD); // Interrupt - Just a marker identifying the stub! + // This used to use 0xCD, but that value is used by JITMemoryManager to + // initialize the buffer with garbage, which means it may follow a + // noreturn function call, confusing X86CompilationCallback2. PR 4929. + JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub! return JCE.finishGVStub(F); } @@ -495,9 +493,11 @@ void X86JITInfo::emitFunctionStubAtAddr(const Function* F, void *Fn, void *Stub, // complains about casting a function pointer to a normal pointer. JCE.startGVStub(F, Stub, 5); JCE.emitByte(0xE9); -#if defined (X86_64_JIT) - assert(((((intptr_t)Fn-JCE.getCurrentPCValue()-5) << 32) >> 32) == - ((intptr_t)Fn-JCE.getCurrentPCValue()-5) +#if defined (X86_64_JIT) && !defined (NDEBUG) + // Yes, we need both of these casts, or some broken versions of GCC (4.2.4) + // get the signed-ness of the expression wrong. Go figure. + intptr_t Displacement = (intptr_t)Fn - (intptr_t)JCE.getCurrentPCValue() - 5; + assert(((Displacement << 32) >> 32) == Displacement && "PIC displacement does not fit in displacement field!"); #endif JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); @@ -538,6 +538,7 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR, break; } case X86::reloc_absolute_word: + case X86::reloc_absolute_word_sext: // Absolute relocation, just add the relocated value to the value already // in memory. *((unsigned*)RelocPos) += (unsigned)ResultPtr; @@ -554,7 +555,7 @@ char* X86JITInfo::allocateThreadLocalMemory(size_t size) { TLSOffset -= size; return TLSOffset; #else - assert(0 && "Cannot allocate thread local storage on this arch!\n"); + llvm_unreachable("Cannot allocate thread local storage on this arch!"); return 0; #endif } diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h index 6a4e214..c381433 100644 --- a/lib/Target/X86/X86JITInfo.h +++ b/lib/Target/X86/X86JITInfo.h @@ -20,16 +20,15 @@ namespace llvm { class X86TargetMachine; + class X86Subtarget; class X86JITInfo : public TargetJITInfo { X86TargetMachine &TM; + const X86Subtarget *Subtarget; uintptr_t PICBase; char* TLSOffset; public: - explicit X86JITInfo(X86TargetMachine &tm) : TM(tm) { - useGOT = 0; - TLSOffset = 0; - } + explicit X86JITInfo(X86TargetMachine &tm); /// replaceMachineCodeForFunction - Make it so that calling the function /// whose machine code is at OLD turns into a call to NEW, perhaps by diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp new file mode 100644 index 0000000..9d7e66d --- /dev/null +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -0,0 +1,123 @@ +//===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the X86MCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "X86MCAsmInfo.h" +#include "X86TargetMachine.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +enum AsmWriterFlavorTy { + // Note: This numbering has to match the GCC assembler dialects for inline + // asm alternatives to work right. + ATT = 0, Intel = 1 +}; + +static cl::opt<AsmWriterFlavorTy> +AsmWriterFlavor("x86-asm-syntax", cl::init(ATT), + cl::desc("Choose style of code to emit from X86 backend:"), + cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"), + clEnumValN(Intel, "intel", "Emit Intel-style assembly"), + clEnumValEnd)); + + +static const char *const x86_asm_table[] = { + "{si}", "S", + "{di}", "D", + "{ax}", "a", + "{cx}", "c", + "{memory}", "memory", + "{flags}", "", + "{dirflag}", "", + "{fpsr}", "", + "{cc}", "cc", + 0,0}; + +X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { + AsmTransCBE = x86_asm_table; + AssemblerDialect = AsmWriterFlavor; + + bool is64Bit = Triple.getArch() == Triple::x86_64; + + TextAlignFillValue = 0x90; + + if (!is64Bit) + Data64bitsDirective = 0; // we can't emit a 64-bit unit + + // Leopard and above support aligned common symbols. + COMMDirectiveTakesAlignment = Triple.getDarwinMajorNumber() >= 9; + + CommentString = "##"; + PCSymbol = "."; + + SupportsDebugInformation = true; + DwarfUsesInlineInfoSection = true; + + // Exceptions handling + ExceptionsType = ExceptionHandling::Dwarf; + AbsoluteEHSectionOffsets = false; +} + +X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) { + AsmTransCBE = x86_asm_table; + AssemblerDialect = AsmWriterFlavor; + + PrivateGlobalPrefix = ".L"; + WeakRefDirective = "\t.weak\t"; + SetDirective = "\t.set\t"; + PCSymbol = "."; + + // Set up DWARF directives + HasLEB128 = true; // Target asm supports leb128 directives (little-endian) + + // Debug Information + AbsoluteDebugSectionOffsets = true; + SupportsDebugInformation = true; + + // Exceptions handling + ExceptionsType = ExceptionHandling::Dwarf; + AbsoluteEHSectionOffsets = false; + + // On Linux we must declare when we can use a non-executable stack. + if (Triple.getOS() == Triple::Linux) + NonexecutableStackDirective = "\t.section\t.note.GNU-stack,\"\",@progbits"; +} + +X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { + AsmTransCBE = x86_asm_table; + AssemblerDialect = AsmWriterFlavor; +} + + +X86WinMCAsmInfo::X86WinMCAsmInfo(const Triple &Triple) { + AsmTransCBE = x86_asm_table; + AssemblerDialect = AsmWriterFlavor; + + GlobalPrefix = "_"; + CommentString = ";"; + + PrivateGlobalPrefix = "$"; + AlignDirective = "\tALIGN\t"; + ZeroDirective = "\tdb\t"; + ZeroDirectiveSuffix = " dup(0)"; + AsciiDirective = "\tdb\t"; + AscizDirective = 0; + Data8bitsDirective = "\tdb\t"; + Data16bitsDirective = "\tdw\t"; + Data32bitsDirective = "\tdd\t"; + Data64bitsDirective = "\tdq\t"; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + + AlignmentIsInBytes = true; +} diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h new file mode 100644 index 0000000..18e2bdb --- /dev/null +++ b/lib/Target/X86/X86MCAsmInfo.h @@ -0,0 +1,42 @@ +//=====-- X86MCAsmInfo.h - X86 asm properties -----------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the X86MCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86TARGETASMINFO_H +#define X86TARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAsmInfoCOFF.h" +#include "llvm/MC/MCAsmInfoDarwin.h" + +namespace llvm { + class Triple; + + struct X86MCAsmInfoDarwin : public MCAsmInfoDarwin { + explicit X86MCAsmInfoDarwin(const Triple &Triple); + }; + + struct X86ELFMCAsmInfo : public MCAsmInfo { + explicit X86ELFMCAsmInfo(const Triple &Triple); + }; + + struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF { + explicit X86MCAsmInfoCOFF(const Triple &Triple); + }; + + struct X86WinMCAsmInfo : public MCAsmInfo { + explicit X86WinMCAsmInfo(const Triple &Triple); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index a2f319f..f03723a 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -30,14 +30,16 @@ #include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, @@ -54,6 +56,7 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, Is64Bit = Subtarget->is64Bit(); IsWin64 = Subtarget->isTargetWin64(); StackAlign = TM.getFrameInfo()->getStackAlignment(); + if (Is64Bit) { SlotSize = 8; StackPtr = X86::RSP; @@ -65,12 +68,12 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, } } -// getDwarfRegNum - This function maps LLVM register identifiers to the -// Dwarf specific numbering, used in debug info and exception tables. - +/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF +/// specific numbering, used in debug info and exception tables. int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); unsigned Flavour = DWARFFlavour::X86_64; + if (!Subtarget->is64Bit()) { if (Subtarget->isTargetDarwin()) { if (isEH) @@ -88,9 +91,8 @@ int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour); } -// getX86RegNum - This function maps LLVM register identifiers to their X86 -// specific numbering, which is used in various places encoding instructions. -// +/// getX86RegNum - This function maps LLVM register identifiers to their X86 +/// specific numbering, which is used in various places encoding instructions. unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { switch(RegNo) { case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; @@ -146,17 +148,131 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { default: assert(isVirtualRegister(RegNo) && "Unknown physical register!"); - assert(0 && "Register allocator hasn't allocated reg correctly yet!"); + llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); return 0; } } -const TargetRegisterClass *X86RegisterInfo::getPointerRegClass() const { - const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); - if (Subtarget->is64Bit()) - return &X86::GR64RegClass; - else +const TargetRegisterClass * +X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, + unsigned SubIdx) const { + switch (SubIdx) { + default: return 0; + case 1: + // 8-bit + if (B == &X86::GR8RegClass) { + if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8) + return A; + } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || + A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || + A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || + A == &X86::GR32_NOREXRegClass || + A == &X86::GR32_NOSPRegClass) + return &X86::GR32_ABCDRegClass; + else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || + A == &X86::GR16_NOREXRegClass) + return &X86::GR16_ABCDRegClass; + } else if (B == &X86::GR8_NOREXRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_NOREXRegClass; + else if (A == &X86::GR64_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass || + A == &X86::GR32_NOSPRegClass) + return &X86::GR32_NOREXRegClass; + else if (A == &X86::GR32_ABCDRegClass) + return &X86::GR32_ABCDRegClass; + else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass) + return &X86::GR16_NOREXRegClass; + else if (A == &X86::GR16_ABCDRegClass) + return &X86::GR16_ABCDRegClass; + } + break; + case 2: + // 8-bit hi + if (B == &X86::GR8_ABCD_HRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || + A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || + A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || + A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass) + return &X86::GR32_ABCDRegClass; + else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || + A == &X86::GR16_NOREXRegClass) + return &X86::GR16_ABCDRegClass; + } + break; + case 3: + // 16-bit + if (B == &X86::GR16RegClass) { + if (A->getSize() == 4 || A->getSize() == 8) + return A; + } else if (B == &X86::GR16_ABCDRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || + A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || + A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || + A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass) + return &X86::GR32_ABCDRegClass; + } else if (B == &X86::GR16_NOREXRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_NOREXRegClass; + else if (A == &X86::GR64_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass || + A == &X86::GR32_NOSPRegClass) + return &X86::GR32_NOREXRegClass; + else if (A == &X86::GR32_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + } + break; + case 4: + // 32-bit + if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) { + if (A->getSize() == 8) + return A; + } else if (B == &X86::GR32_ABCDRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || + A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || + A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_ABCDRegClass; + } else if (B == &X86::GR32_NOREXRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_NOREXRegClass; + else if (A == &X86::GR64_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + } + break; + } + return 0; +} + +const TargetRegisterClass * +X86RegisterInfo::getPointerRegClass(unsigned Kind) const { + switch (Kind) { + default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); + case 0: // Normal GPRs. + if (TM.getSubtarget<X86Subtarget>().is64Bit()) + return &X86::GR64RegClass; return &X86::GR32RegClass; + case 1: // Normal GRPs except the stack pointer (for encoding reasons). + if (TM.getSubtarget<X86Subtarget>().is64Bit()) + return &X86::GR64_NOSPRegClass; + return &X86::GR32_NOSPRegClass; + } } const TargetRegisterClass * @@ -276,6 +392,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::ESP); Reserved.set(X86::SP); Reserved.set(X86::SPL); + // Set the frame-pointer register and its aliases as reserved if needed. if (hasFP(MF)) { Reserved.set(X86::RBP); @@ -283,10 +400,10 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::BP); Reserved.set(X86::BPL); } - // Mark the x87 stack registers as reserved, since they don't - // behave normally with respect to liveness. We don't fully - // model the effects of x87 stack pushes and pops after - // stackification. + + // Mark the x87 stack registers as reserved, since they don't behave normally + // with respect to liveness. We don't fully model the effects of x87 stack + // pushes and pops after stackification. Reserved.set(X86::ST0); Reserved.set(X86::ST1); Reserved.set(X86::ST2); @@ -304,10 +421,12 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { unsigned MaxAlign = 0; + for (int i = FFI->getObjectIndexBegin(), e = FFI->getObjectIndexEnd(); i != e; ++i) { if (FFI->isDeadObjectIndex(i)) continue; + unsigned Align = FFI->getObjectAlignment(i); MaxAlign = std::max(MaxAlign, Align); } @@ -315,10 +434,9 @@ static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { return MaxAlign; } -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -// +/// hasFP - Return true if the specified function should have a dedicated frame +/// pointer register. This is true if the function has variable sized allocas +/// or if frame pointer elimination is disabled. bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); @@ -335,7 +453,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); // FIXME: Currently we don't support stack realignment for functions with - // variable-sized allocas + // variable-sized allocas return (RealignStack && (MFI->getMaxAlignment() > StackAlign && !MFI->hasVarSizedObjects())); @@ -345,34 +463,45 @@ bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } +bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, + int &FrameIdx) const { + if (Reg == FramePtr && hasFP(MF)) { + FrameIdx = MF.getFrameInfo()->getObjectIndexBegin(); + return true; + } + return false; +} + int X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { - int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize; - uint64_t StackSize = MF.getFrameInfo()->getStackSize(); + const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea(); + uint64_t StackSize = MFI->getStackSize(); if (needsStackRealignment(MF)) { - if (FI < 0) - // Skip the saved EBP + if (FI < 0) { + // Skip the saved EBP. Offset += SlotSize; - else { - unsigned Align = MF.getFrameInfo()->getObjectAlignment(FI); + } else { + unsigned Align = MFI->getObjectAlignment(FI); assert( (-(Offset + StackSize)) % Align == 0); Align = 0; return Offset + StackSize; } - // FIXME: Support tail calls } else { if (!hasFP(MF)) return Offset + StackSize; - // Skip the saved EBP + // Skip the saved EBP. Offset += SlotSize; // Skip the RETADDR move area X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta; + if (TailCallReturnAddrDelta < 0) + Offset -= TailCallReturnAddrDelta; } return Offset; @@ -392,24 +521,29 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; + Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; MachineInstr *New = 0; if (Old->getOpcode() == getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri), - StackPtr).addReg(StackPtr).addImm(Amount); + StackPtr) + .addReg(StackPtr) + .addImm(Amount); } else { assert(Old->getOpcode() == getCallFrameDestroyOpcode()); - // factor out the amount the callee already popped. + + // Factor out the amount the callee already popped. uint64_t CalleeAmt = Old->getOperand(1).getImm(); Amount -= CalleeAmt; - if (Amount) { + + if (Amount) { unsigned Opc = (Amount < 128) ? (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(Amount); + .addReg(StackPtr) + .addImm(Amount); } } @@ -417,7 +551,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); - // Replace the pseudo instruction with a new instruction... + // Replace the pseudo instruction with a new instruction. MBB.insert(I, New); } } @@ -432,10 +566,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineInstr *Old = I; MachineInstr *New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), - StackPtr).addReg(StackPtr).addImm(CalleeAmt); + StackPtr) + .addReg(StackPtr) + .addImm(CalleeAmt); + // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); - MBB.insert(I, New); } } @@ -443,21 +579,24 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const{ +unsigned +X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const{ assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); + while (!MI.getOperand(i).isFI()) { ++i; assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } int FrameIndex = MI.getOperand(i).getIndex(); - unsigned BasePtr; + if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); else @@ -471,34 +610,33 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (MI.getOperand(i+3).isImm()) { // Offset is a 32-bit integer. int Offset = getFrameIndexOffset(MF, FrameIndex) + - (int)(MI.getOperand(i+3).getImm()); + (int)(MI.getOperand(i + 3).getImm()); - MI.getOperand(i+3).ChangeToImmediate(Offset); + MI.getOperand(i + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) + (uint64_t)MI.getOperand(i+3).getOffset(); MI.getOperand(i+3).setOffset(Offset); } + return 0; } void X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { - MachineFrameInfo *FFI = MF.getFrameInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - unsigned MaxAlign = std::max(FFI->getMaxAlignment(), - calculateMaxStackAlignment(FFI)); + unsigned MaxAlign = std::max(MFI->getMaxAlignment(), + calculateMaxStackAlignment(MFI)); - FFI->setMaxAlignment(MaxAlign); -} + MFI->setMaxAlignment(MaxAlign); -void -X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) { // create RETURNADDR area // arg @@ -509,18 +647,21 @@ X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{ // ... // } // [EBP] - MF.getFrameInfo()-> - CreateFixedObject(-TailCallReturnAddrDelta, - (-1*SlotSize)+TailCallReturnAddrDelta); + MFI->CreateFixedObject(-TailCallReturnAddrDelta, + (-1U*SlotSize)+TailCallReturnAddrDelta); } + if (hasFP(MF)) { assert((TailCallReturnAddrDelta <= 0) && "The Delta should always be zero or negative"); + const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); + // Create a frame entry for the EBP register that must be saved. - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, - (int)SlotSize * -2+ - TailCallReturnAddrDelta); - assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && + int FrameIdx = MFI->CreateFixedObject(SlotSize, + -(int)SlotSize + + TFI.getOffsetOfLocalArea() + + TailCallReturnAddrDelta); + assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); FrameIdx = 0; } @@ -549,14 +690,14 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(ThisVal); - // The EFLAGS implicit def is dead. - MI->getOperand(3).setIsDead(); + .addReg(StackPtr) + .addImm(ThisVal); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. Offset -= ThisVal; } } -// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. +/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. static void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, uint64_t *NumBytes = NULL) { @@ -579,11 +720,12 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, } } -// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator. +/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator. static void mergeSPUpdatesDown(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, uint64_t *NumBytes = NULL) { + // FIXME: THIS ISN'T RUN!!! return; if (MBBI == MBB.end()) return; @@ -610,23 +752,22 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB, } /// mergeSPUpdates - Checks the instruction before/after the passed -/// instruction. If it is an ADD/SUB instruction it is deleted -/// argument and the stack adjustment is returned as a positive value for ADD -/// and a negative for SUB. +/// instruction. If it is an ADD/SUB instruction it is deleted argument and the +/// stack adjustment is returned as a positive value for ADD and a negative for +/// SUB. static int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, bool doMergeWithPrevious) { - if ((doMergeWithPrevious && MBBI == MBB.begin()) || (!doMergeWithPrevious && MBBI == MBB.end())) return 0; - int Offset = 0; - MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI); unsigned Opc = PI->getOpcode(); + int Offset = 0; + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && PI->getOperand(0).getReg() == StackPtr){ @@ -644,122 +785,116 @@ static int mergeSPUpdates(MachineBasicBlock &MBB, return Offset; } -void X86RegisterInfo::emitFrameMoves(MachineFunction &MF, - unsigned FrameLabelId, - unsigned ReadyLabelId) const { +void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF, + unsigned LabelId, + unsigned FramePtr) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); - if (!MMI) - return; + if (!MMI) return; + + // Add callee saved registers to move list. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + if (CSI.empty()) return; - uint64_t StackSize = MFI->getStackSize(); std::vector<MachineMove> &Moves = MMI->getFrameMoves(); const TargetData *TD = MF.getTarget().getTargetData(); + bool HasFP = hasFP(MF); - // Calculate amount of bytes used for return address storing + // Calculate amount of bytes used for return address storing. int stackGrowth = (MF.getTarget().getFrameInfo()->getStackGrowthDirection() == TargetFrameInfo::StackGrowsUp ? TD->getPointerSize() : -TD->getPointerSize()); - MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr); - MachineLocation FPSrc(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); - - if (StackSize) { - // Show update of SP. - if (hasFP(MF)) { - // Adjust SP - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, 2*stackGrowth); - Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); - } else { - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, - -StackSize+stackGrowth); - Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); - } - } else { - // FIXME: Verify & implement for FP - MachineLocation SPDst(StackPtr); - MachineLocation SPSrc(StackPtr, stackGrowth); - Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); - } - - // Add callee saved registers to move list. - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - // FIXME: This is dirty hack. The code itself is pretty mess right now. // It should be rewritten from scratch and generalized sometimes. - // Determine maximum offset (minumum due to stack growth) + // Determine maximum offset (minumum due to stack growth). int64_t MaxOffset = 0; - for (unsigned I = 0, E = CSI.size(); I!=E; ++I) + for (std::vector<CalleeSavedInfo>::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) MaxOffset = std::min(MaxOffset, - MFI->getObjectOffset(CSI[I].getFrameIdx())); - - // Calculate offsets - int64_t saveAreaOffset = (hasFP(MF) ? 3 : 2)*stackGrowth; - for (unsigned I = 0, E = CSI.size(); I!=E; ++I) { - int64_t Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); - unsigned Reg = CSI[I].getReg(); - Offset = (MaxOffset-Offset+saveAreaOffset); + MFI->getObjectOffset(I->getFrameIdx())); + + // Calculate offsets. + int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; + for (std::vector<CalleeSavedInfo>::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + Offset = MaxOffset - Offset + saveAreaOffset; + + // Don't output a new machine move if we're re-saving the frame + // pointer. This happens when the PrologEpilogInserter has inserted an extra + // "PUSH" of the frame pointer -- the "emitPrologue" method automatically + // generates one when frame pointers are used. If we generate a "machine + // move" for this extra "PUSH", the linker will lose track of the fact that + // the frame pointer should have the value of the first "PUSH" when it's + // trying to unwind. + // + // FIXME: This looks inelegant. It's possibly correct, but it's covering up + // another bug. I.e., one where we generate a prolog like this: + // + // pushl %ebp + // movl %esp, %ebp + // pushl %ebp + // pushl %esi + // ... + // + // The immediate re-push of EBP is unnecessary. At the least, it's an + // optimization bug. EBP can be used as a scratch register in certain + // cases, but probably not when we have a frame pointer. + if (HasFP && FramePtr == Reg) + continue; + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); MachineLocation CSSrc(Reg); - Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); - } - - if (hasFP(MF)) { - // Save FP - MachineLocation FPDst(MachineLocation::VirtualFP, 2*stackGrowth); - MachineLocation FPSrc(FramePtr); - Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); + Moves.push_back(MachineMove(LabelId, CSDst, CSSrc)); } } - +/// emitPrologue - Push callee-saved registers onto the stack, which +/// automatically adjust the stack pointer. Adjust the stack pointer to allocate +/// space for local variables. Also emit labels used by the exception handler to +/// generate the exception handling frames. void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. + MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); - const Function* Fn = MF.getFunction(); - const X86Subtarget* Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>(); + const Function *Fn = MF.getFunction(); + const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>(); MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - MachineBasicBlock::iterator MBBI = MBB.begin(); bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) || - !Fn->doesNotThrow() || - UnwindTablesMandatory; + !Fn->doesNotThrow() || UnwindTablesMandatory; + uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. + uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. + bool HasFP = hasFP(MF); DebugLoc DL; - // Prepare for frame info. - unsigned FrameLabelId = 0; - - // Get the number of bytes to allocate from the FrameInfo. - uint64_t StackSize = MFI->getStackSize(); - - // Get desired stack alignment - uint64_t MaxAlign = MFI->getMaxAlignment(); - // Add RETADDR move area to callee saved frame size. int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); if (TailCallReturnAddrDelta < 0) X86FI->setCalleeSavedFrameSize( - X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta)); + X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); // If this is x86-64 and the Red Zone is not disabled, if we are a leaf // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). - bool DisableRedZone = Fn->hasFnAttr(Attribute::NoRedZone); - if (Is64Bit && !DisableRedZone && + if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->hasCalls() && // No calls. !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); - if (hasFP(MF)) MinSize += SlotSize; - StackSize = std::max(MinSize, - StackSize > 128 ? StackSize - 128 : 0); + if (HasFP) MinSize += SlotSize; + StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); + MFI->setStackSize(StackSize); + } else if (Subtarget->isTargetWin64()) { + // We need to always allocate 32 bytes as register spill area. + // FIXME: We might reuse these 32 bytes for leaf functions. + StackSize += 32; MFI->setStackSize(StackSize); } @@ -769,33 +904,73 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (TailCallReturnAddrDelta < 0) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri), - StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta); - // The EFLAGS implicit def is dead. - MI->getOperand(3).setIsDead(); + StackPtr) + .addReg(StackPtr) + .addImm(-TailCallReturnAddrDelta); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. } + // Mapping for machine moves: + // + // DST: VirtualFP AND + // SRC: VirtualFP => DW_CFA_def_cfa_offset + // ELSE => DW_CFA_def_cfa + // + // SRC: VirtualFP AND + // DST: Register => DW_CFA_def_cfa_register + // + // ELSE + // OFFSET < 0 => DW_CFA_offset_extended_sf + // REG < 64 => DW_CFA_offset + Reg + // ELSE => DW_CFA_offset_extended + + std::vector<MachineMove> &Moves = MMI->getFrameMoves(); + const TargetData *TD = MF.getTarget().getTargetData(); uint64_t NumBytes = 0; - if (hasFP(MF)) { - // Calculate required stack adjustment + int stackGrowth = + (MF.getTarget().getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsUp ? + TD->getPointerSize() : -TD->getPointerSize()); + + if (HasFP) { + // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; if (needsStackRealignment(MF)) - FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; + FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); - // Get the offset of the stack slot for the EBP register... which is + // Get the offset of the stack slot for the EBP register, which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. MFI->setOffsetAdjustment(-NumBytes); - // Save EBP into the appropriate stack slot... + // Save EBP/RBP into the appropriate stack slot. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) .addReg(FramePtr, RegState::Kill); if (needsFrameMoves) { - // Mark effective beginning of when frame pointer becomes valid. - FrameLabelId = MMI->NextLabelID(); + // Mark the place where EBP/RBP was saved. + unsigned FrameLabelId = MMI->NextLabelID(); BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId); + + // Define the current CFA rule to use the provided offset. + if (StackSize) { + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth); + Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); + } else { + // FIXME: Verify & implement for FP + MachineLocation SPDst(StackPtr); + MachineLocation SPSrc(StackPtr, stackGrowth); + Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); + } + + // Change the rule for the FramePtr to be an "offset" rule. + MachineLocation FPDst(MachineLocation::VirtualFP, + 2 * stackGrowth); + MachineLocation FPSrc(FramePtr); + Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); } // Update EBP with the new base value... @@ -803,6 +978,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr); + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer becomes valid. + unsigned FrameLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId); + + // Define the current CFA to use the EBP/RBP register. + MachineLocation FPDst(FramePtr); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); + } + // Mark the FramePtr as live-in in every block except the entry. for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); I != E; ++I) @@ -814,6 +1000,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr).addReg(StackPtr).addImm(-MaxAlign); + // The EFLAGS implicit def is dead. MI->getOperand(3).setIsDead(); } @@ -822,11 +1009,30 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { } // Skip the callee-saved push instructions. + bool PushedRegs = false; + int StackOffset = 2 * stackGrowth; + while (MBBI != MBB.end() && (MBBI->getOpcode() == X86::PUSH32r || - MBBI->getOpcode() == X86::PUSH64r)) + MBBI->getOpcode() == X86::PUSH64r)) { + PushedRegs = true; ++MBBI; + if (!HasFP && needsFrameMoves) { + // Mark callee-saved push instruction. + unsigned LabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId); + + // Define the current CFA rule to use the provided offset. + unsigned Ptr = StackSize ? + MachineLocation::VirtualFP : StackPtr; + MachineLocation SPDst(Ptr); + MachineLocation SPSrc(Ptr, StackOffset); + Moves.push_back(MachineMove(LabelId, SPDst, SPSrc)); + StackOffset += stackGrowth; + } + } + if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); @@ -883,12 +1089,29 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); } - if (needsFrameMoves) { - unsigned ReadyLabelId = 0; - // Mark effective beginning of when frame pointer is ready. - ReadyLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); - emitFrameMoves(MF, FrameLabelId, ReadyLabelId); + if ((NumBytes || PushedRegs) && needsFrameMoves) { + // Mark end of stack pointer adjustment. + unsigned LabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId); + + if (!HasFP && NumBytes) { + // Define the current CFA rule to use the provided offset. + if (StackSize) { + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, + -StackSize + stackGrowth); + Moves.push_back(MachineMove(LabelId, SPDst, SPSrc)); + } else { + // FIXME: Verify & implement for FP + MachineLocation SPDst(StackPtr); + MachineLocation SPSrc(StackPtr, stackGrowth); + Moves.push_back(MachineMove(LabelId, SPDst, SPSrc)); + } + } + + // Emit DWARF info specifying the offsets of the callee-saved registers. + if (PushedRegs) + emitCalleeSavedFrameMoves(MF, LabelId, HasFP ? FramePtr : StackPtr); } } @@ -901,6 +1124,8 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, DebugLoc DL = MBBI->getDebugLoc(); switch (RetOpcode) { + default: + llvm_unreachable("Can only insert epilog into returning blocks"); case X86::RET: case X86::RETI: case X86::TCRETURNdi: @@ -911,26 +1136,25 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, case X86::EH_RETURN64: case X86::TAILJMPd: case X86::TAILJMPr: - case X86::TAILJMPm: break; // These are ok - default: - assert(0 && "Can only insert epilog into returning blocks"); + case X86::TAILJMPm: + break; // These are ok } - // Get the number of bytes to allocate from the FrameInfo + // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); uint64_t MaxAlign = MFI->getMaxAlignment(); unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { - // Calculate required stack adjustment + // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; if (needsStackRealignment(MF)) FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; NumBytes = FrameSize - CSSize; - // pop EBP. + // Pop EBP. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); } else { @@ -942,9 +1166,11 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); + if (Opc != X86::POP32r && Opc != X86::POP64r && !PI->getDesc().isTerminator()) break; + --MBBI; } @@ -957,10 +1183,10 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, // If dynamic alloca is used, then reset esp to point to the last callee-saved // slot before popping them off! Same applies for the case, when stack was - // realigned + // realigned. if (needsStackRealignment(MF)) { // We cannot use LEA here, because stack pointer was realigned. We need to - // deallocate local frame back + // deallocate local frame back. if (CSSize) { emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); MBBI = prior(LastCSPop); @@ -972,17 +1198,18 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, } else if (MFI->hasVarSizedObjects()) { if (CSSize) { unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; - MachineInstr *MI = addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), - FramePtr, false, -CSSize); + MachineInstr *MI = + addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), + FramePtr, false, -CSSize); MBB.insert(MBBI, MI); - } else - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), - StackPtr).addReg(FramePtr); - - } else { - // adjust stack pointer back: ESP += numbytes - if (NumBytes) - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); + } else { + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) + .addReg(FramePtr); + } + } else if (NumBytes) { + // Adjust stack pointer back: ESP += numbytes. + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); } // We're returning from function via eh_return. @@ -993,9 +1220,9 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr).addReg(DestAddr.getReg()); - // Tail call return: adjust the stack pointer and jump to callee } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) { + // Tail call return: adjust the stack pointer and jump to callee. MBBI = prior(MBB.end()); MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(1); @@ -1006,6 +1233,7 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, int MaxTCDelta = X86FI->getTCReturnAddrDelta(); int Offset = 0; assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); + // Incoporate the retaddr area. Offset = StackAdj-MaxTCDelta; assert(Offset >= 0 && "Offset should never be negative"); @@ -1032,6 +1260,7 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, // Add the return addr area delta back since we are not tail calling. int delta = -1*X86FI->getTCReturnAddrDelta(); MBBI = prior(MBB.end()); + // Check for possible merge with preceeding ADD instruction. delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII); @@ -1039,18 +1268,16 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, } unsigned X86RegisterInfo::getRARegister() const { - if (Is64Bit) - return X86::RIP; // Should have dwarf #16 - else - return X86::EIP; // Should have dwarf #8 + return Is64Bit ? X86::RIP // Should have dwarf #16. + : X86::EIP; // Should have dwarf #8. } unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const { return hasFP(MF) ? FramePtr : StackPtr; } -void X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) - const { +void +X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const { // Calculate amount of bytes used for return address storing int stackGrowth = (Is64Bit ? -8 : -4); @@ -1066,18 +1293,18 @@ void X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) } unsigned X86RegisterInfo::getEHExceptionRegister() const { - assert(0 && "What is the exception register"); + llvm_unreachable("What is the exception register"); return 0; } unsigned X86RegisterInfo::getEHHandlerRegister() const { - assert(0 && "What is the exception handler register"); + llvm_unreachable("What is the exception handler register"); return 0; } namespace llvm { -unsigned getX86SubSuperRegister(unsigned Reg, MVT VT, bool High) { - switch (VT.getSimpleVT()) { +unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { + switch (VT.getSimpleVT().SimpleTy) { default: return Reg; case MVT::i8: if (High) { @@ -1264,14 +1491,21 @@ namespace { RegNum < RI.getLastVirtReg(); ++RegNum) MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); - FFI->setMaxAlignment(MaxAlign); + if (FFI->getMaxAlignment() == MaxAlign) + return false; - return false; + FFI->setMaxAlignment(MaxAlign); + return true; } virtual const char *getPassName() const { return "X86 Maximal Stack Alignment Calculator"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } }; char MSAC::ID = 0; diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 33b9f5e..f635707 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -93,9 +93,16 @@ public: /// Code Generation virtual methods... /// + /// getMatchingSuperRegClass - Return a subclass of the specified register + /// class A so that each register in it has a sub-register of the + /// specified sub-register index which is in the specified register class B. + virtual const TargetRegisterClass * + getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, unsigned Idx) const; + /// getPointerRegClass - Returns a TargetRegisterClass used for pointer /// values. - const TargetRegisterClass *getPointerRegClass() const; + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; /// getCrossCopyRegClass - Returns a legal register class to copy a register /// in the specified class to or from. Returns NULL if it is possible to copy @@ -125,23 +132,25 @@ public: bool hasReservedCallFrame(MachineFunction &MF) const; + bool hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, + int &FrameIdx) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; - void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; + void emitCalleeSavedFrameMoves(MachineFunction &MF, unsigned LabelId, + unsigned FramePtr) const; void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - void emitFrameMoves(MachineFunction &MF, - unsigned FrameLabelId, unsigned ReadyLabelId) const; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(MachineFunction &MF) const; @@ -155,8 +164,8 @@ public: // getX86SubSuperRegister - X86 utility function. It returns the sub or super // register of a specific X86 register. -// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX -unsigned getX86SubSuperRegister(unsigned, MVT, bool High=false); +// e.g. getX86SubSuperRegister(X86::EAX, EVT::i16) return X86:AX +unsigned getX86SubSuperRegister(unsigned, EVT, bool High=false); } // End llvm namespace diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 2e6f017..7bf074d 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -270,42 +270,27 @@ def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, // require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d" // cannot be encoded. def GR8 : RegisterClass<"X86", [i8], 8, - [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL, + [AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL, R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> { let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate SPL or BPL. - static const unsigned X86_GR8_AO_64_fp[] = { - X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, - X86::R8B, X86::R9B, X86::R10B, X86::R11B, - X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B - }; - // If not, just don't allocate SPL. static const unsigned X86_GR8_AO_64[] = { X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::R8B, X86::R9B, X86::R10B, X86::R11B, X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL }; - // In 32-mode, none of the 8-bit registers aliases EBP or ESP. - static const unsigned X86_GR8_AO_32[] = { - X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH - }; GR8Class::iterator GR8Class::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - if (!Subtarget.is64Bit()) - return X86_GR8_AO_32; - else if (RI->hasFP(MF)) - return X86_GR8_AO_64_fp; - else + if (Subtarget.is64Bit()) return X86_GR8_AO_64; + else + return begin(); } GR8Class::iterator @@ -313,17 +298,20 @@ def GR8 : RegisterClass<"X86", [i8], 8, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + // Does the function dedicate RBP / EBP to being a frame ptr? if (!Subtarget.is64Bit()) - return X86_GR8_AO_32 + (sizeof(X86_GR8_AO_32) / sizeof(unsigned)); + // In 32-mode, none of the 8-bit registers aliases EBP or ESP. + return begin() + 8; else if (RI->hasFP(MF)) - return X86_GR8_AO_64_fp + (sizeof(X86_GR8_AO_64_fp) / sizeof(unsigned)); + // If so, don't allocate SPL or BPL. + return array_endof(X86_GR8_AO_64) - 1; else - return X86_GR8_AO_64 + (sizeof(X86_GR8_AO_64) / sizeof(unsigned)); + // If not, just don't allocate SPL. + return array_endof(X86_GR8_AO_64); } }]; } - def GR16 : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> { @@ -333,42 +321,20 @@ def GR16 : RegisterClass<"X86", [i16], 16, iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate SP or BP. - static const unsigned X86_GR16_AO_64_fp[] = { - X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, - X86::R8W, X86::R9W, X86::R10W, X86::R11W, - X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W - }; - static const unsigned X86_GR16_AO_32_fp[] = { - X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX - }; - // If not, just don't allocate SP. static const unsigned X86_GR16_AO_64[] = { X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::R8W, X86::R9W, X86::R10W, X86::R11W, X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP }; - static const unsigned X86_GR16_AO_32[] = { - X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP - }; GR16Class::iterator GR16Class::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - if (Subtarget.is64Bit()) { - if (RI->hasFP(MF)) - return X86_GR16_AO_64_fp; - else - return X86_GR16_AO_64; - } else { - if (RI->hasFP(MF)) - return X86_GR16_AO_32_fp; - else - return X86_GR16_AO_32; - } + if (Subtarget.is64Bit()) + return X86_GR16_AO_64; + else + return begin(); } GR16Class::iterator @@ -377,21 +343,26 @@ def GR16 : RegisterClass<"X86", [i16], 16, const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); if (Subtarget.is64Bit()) { + // Does the function dedicate RBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR16_AO_64_fp+(sizeof(X86_GR16_AO_64_fp)/sizeof(unsigned)); + // If so, don't allocate SP or BP. + return array_endof(X86_GR16_AO_64) - 1; else - return X86_GR16_AO_64 + (sizeof(X86_GR16_AO_64) / sizeof(unsigned)); + // If not, just don't allocate SP. + return array_endof(X86_GR16_AO_64); } else { + // Does the function dedicate EBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR16_AO_32_fp+(sizeof(X86_GR16_AO_32_fp)/sizeof(unsigned)); + // If so, don't allocate SP or BP. + return begin() + 6; else - return X86_GR16_AO_32 + (sizeof(X86_GR16_AO_32) / sizeof(unsigned)); + // If not, just don't allocate SP. + return begin() + 7; } } }]; } - def GR32 : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { @@ -401,42 +372,20 @@ def GR32 : RegisterClass<"X86", [i32], 32, iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate ESP or EBP. - static const unsigned X86_GR32_AO_64_fp[] = { - X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, - X86::R8D, X86::R9D, X86::R10D, X86::R11D, - X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D - }; - static const unsigned X86_GR32_AO_32_fp[] = { - X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX - }; - // If not, just don't allocate ESP. static const unsigned X86_GR32_AO_64[] = { X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::R8D, X86::R9D, X86::R10D, X86::R11D, X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP }; - static const unsigned X86_GR32_AO_32[] = { - X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP - }; GR32Class::iterator GR32Class::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - if (Subtarget.is64Bit()) { - if (RI->hasFP(MF)) - return X86_GR32_AO_64_fp; - else - return X86_GR32_AO_64; - } else { - if (RI->hasFP(MF)) - return X86_GR32_AO_32_fp; - else - return X86_GR32_AO_32; - } + if (Subtarget.is64Bit()) + return X86_GR32_AO_64; + else + return begin(); } GR32Class::iterator @@ -445,21 +394,29 @@ def GR32 : RegisterClass<"X86", [i32], 32, const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); if (Subtarget.is64Bit()) { + // Does the function dedicate RBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR32_AO_64_fp+(sizeof(X86_GR32_AO_64_fp)/sizeof(unsigned)); + // If so, don't allocate ESP or EBP. + return array_endof(X86_GR32_AO_64) - 1; else - return X86_GR32_AO_64 + (sizeof(X86_GR32_AO_64) / sizeof(unsigned)); + // If not, just don't allocate ESP. + return array_endof(X86_GR32_AO_64); } else { + // Does the function dedicate EBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR32_AO_32_fp+(sizeof(X86_GR32_AO_32_fp)/sizeof(unsigned)); + // If so, don't allocate ESP or EBP. + return begin() + 6; else - return X86_GR32_AO_32 + (sizeof(X86_GR32_AO_32) / sizeof(unsigned)); + // If not, just don't allocate ESP. + return begin() + 7; } } }]; } - +// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since +// RIP isn't really a register and it can't be used anywhere except in an +// address, but it doesn't cause trouble. def GR64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP, RSP, RIP]> { @@ -483,6 +440,11 @@ def GR64 : RegisterClass<"X86", [i64], 64, }]; } +// Segment registers for use by MOV instructions (and others) that have a +// segment register as one operand. Always contain a 16-bit segment +// descriptor. +def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> { +} // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of // GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d" @@ -509,38 +471,25 @@ def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { // On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes // of registers which do not by themselves require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, - [AL, CL, DL, BL, AH, CH, DH, BH, + [AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL]> { let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate SPL or BPL. - static const unsigned X86_GR8_NOREX_AO_64_fp[] = { - X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL - }; - // If not, just don't allocate SPL. static const unsigned X86_GR8_NOREX_AO_64[] = { X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL, X86::BPL }; - // In 32-mode, none of the 8-bit registers aliases EBP or ESP. - static const unsigned X86_GR8_NOREX_AO_32[] = { - X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH - }; GR8_NOREXClass::iterator GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - if (!Subtarget.is64Bit()) - return X86_GR8_NOREX_AO_32; - else if (RI->hasFP(MF)) - return X86_GR8_NOREX_AO_64_fp; - else + if (Subtarget.is64Bit()) return X86_GR8_NOREX_AO_64; + else + return begin(); } GR8_NOREXClass::iterator @@ -548,15 +497,16 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + // Does the function dedicate RBP / EBP to being a frame ptr? if (!Subtarget.is64Bit()) - return X86_GR8_NOREX_AO_32 + - (sizeof(X86_GR8_NOREX_AO_32) / sizeof(unsigned)); + // In 32-mode, none of the 8-bit registers aliases EBP or ESP. + return begin() + 8; else if (RI->hasFP(MF)) - return X86_GR8_NOREX_AO_64_fp + - (sizeof(X86_GR8_NOREX_AO_64_fp) / sizeof(unsigned)); + // If so, don't allocate SPL or BPL. + return array_endof(X86_GR8_NOREX_AO_64) - 1; else - return X86_GR8_NOREX_AO_64 + - (sizeof(X86_GR8_NOREX_AO_64) / sizeof(unsigned)); + // If not, just don't allocate SPL. + return array_endof(X86_GR8_NOREX_AO_64); } }]; } @@ -564,38 +514,20 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP]> { let SubRegClassList = [GR8_NOREX, GR8_NOREX]; let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate SP or BP. - static const unsigned X86_GR16_AO_fp[] = { - X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX - }; - // If not, just don't allocate SP. - static const unsigned X86_GR16_AO[] = { - X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP - }; - - GR16_NOREXClass::iterator - GR16_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) - return X86_GR16_AO_fp; - else - return X86_GR16_AO; - } - GR16_NOREXClass::iterator GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + // Does the function dedicate RBP / EBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR16_AO_fp+(sizeof(X86_GR16_AO_fp)/sizeof(unsigned)); + // If so, don't allocate SP or BP. + return end() - 2; else - return X86_GR16_AO + (sizeof(X86_GR16_AO) / sizeof(unsigned)); + // If not, just don't allocate SP. + return end() - 1; } }]; } @@ -604,89 +536,149 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> { let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX]; let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate ESP or EBP. - static const unsigned X86_GR32_NOREX_AO_fp[] = { - X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX - }; - // If not, just don't allocate ESP. - static const unsigned X86_GR32_NOREX_AO[] = { - X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP - }; - GR32_NOREXClass::iterator - GR32_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { + GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + // Does the function dedicate RBP / EBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR32_NOREX_AO_fp; + // If so, don't allocate ESP or EBP. + return end() - 2; else - return X86_GR32_NOREX_AO; + // If not, just don't allocate ESP. + return end() - 1; } - - GR32_NOREXClass::iterator - GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const { + }]; +} +// GR64_NOREX - GR64 registers which do not require a REX prefix. +def GR64_NOREX : RegisterClass<"X86", [i64], 64, + [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> { + let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GR64_NOREXClass::iterator + GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + // Does the function dedicate RBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR32_NOREX_AO_fp + - (sizeof(X86_GR32_NOREX_AO_fp) / sizeof(unsigned)); + // If so, don't allocate RIP, RSP or RBP. + return end() - 3; else - return X86_GR32_NOREX_AO + - (sizeof(X86_GR32_NOREX_AO) / sizeof(unsigned)); + // If not, just don't allocate RIP or RSP. + return end() - 2; } }]; } -// GR64_NOREX - GR64 registers which do not require a REX prefix. -def GR64_NOREX : RegisterClass<"X86", [i64], 64, - [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; +// GR32_NOSP - GR32 registers except ESP. +def GR32_NOSP : RegisterClass<"X86", [i32], 32, + [EAX, ECX, EDX, ESI, EDI, EBX, EBP, + R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { + let SubRegClassList = [GR8, GR8, GR16]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate RSP or RBP. - static const unsigned X86_GR64_NOREX_AO_fp[] = { - X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX - }; - // If not, just don't allocate RSP. - static const unsigned X86_GR64_NOREX_AO[] = { - X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX, X86::RBP + static const unsigned X86_GR32_NOSP_AO_64[] = { + X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, + X86::R8D, X86::R9D, X86::R10D, X86::R11D, + X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP }; - GR64_NOREXClass::iterator - GR64_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { + GR32_NOSPClass::iterator + GR32_NOSPClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (Subtarget.is64Bit()) + return X86_GR32_NOSP_AO_64; + else + return begin(); + } + + GR32_NOSPClass::iterator + GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) - return X86_GR64_NOREX_AO_fp; + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (Subtarget.is64Bit()) { + // Does the function dedicate RBP to being a frame ptr? + if (RI->hasFP(MF)) + // If so, don't allocate EBP. + return array_endof(X86_GR32_NOSP_AO_64) - 1; + else + // If not, any reg in this class is ok. + return array_endof(X86_GR32_NOSP_AO_64); + } else { + // Does the function dedicate EBP to being a frame ptr? + if (RI->hasFP(MF)) + // If so, don't allocate EBP. + return begin() + 6; + else + // If not, any reg in this class is ok. + return begin() + 7; + } + } + }]; +} + +// GR64_NOSP - GR64 registers except RSP (and RIP). +def GR64_NOSP : RegisterClass<"X86", [i64], 64, + [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + RBX, R14, R15, R12, R13, RBP]> { + let SubRegClassList = [GR8, GR8, GR16, GR32_NOSP]; + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GR64_NOSPClass::iterator + GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (!Subtarget.is64Bit()) + return begin(); // None of these are allocatable in 32-bit. + if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr? + return end()-1; // If so, don't allocate RBP else - return X86_GR64_NOREX_AO; + return end(); // If not, any reg in this class is ok. } + }]; +} - GR64_NOREXClass::iterator - GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const { +// GR64_NOREX_NOSP - GR64_NOREX registers except RSP. +def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, + [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> { + let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GR64_NOREX_NOSPClass::iterator + GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + // Does the function dedicate RBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR64_NOREX_AO_fp + - (sizeof(X86_GR64_NOREX_AO_fp) / sizeof(unsigned)); + // If so, don't allocate RBP. + return end() - 1; else - return X86_GR64_NOREX_AO + - (sizeof(X86_GR64_NOREX_AO) / sizeof(unsigned)); + // If not, any reg in this class is ok. + return end(); } }]; } // A class to support the 'A' assembler constraint: EAX then EDX. -def GRAD : RegisterClass<"X86", [i32], 32, [EAX, EDX]>; +def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> { + let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD]; +} // Scalar SSE2 floating point registers. def FR32 : RegisterClass<"X86", [f32], 32, diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h index b225f48..990962d 100644 --- a/lib/Target/X86/X86Relocations.h +++ b/lib/Target/X86/X86Relocations.h @@ -20,21 +20,31 @@ namespace llvm { namespace X86 { /// RelocationType - An enum for the x86 relocation codes. Note that /// the terminology here doesn't follow x86 convention - word means - /// 32-bit and dword means 64-bit. + /// 32-bit and dword means 64-bit. The relocations will be treated + /// by JIT or ObjectCode emitters, this is transparent to the x86 code + /// emitter but JIT and ObjectCode will treat them differently enum RelocationType { - // reloc_pcrel_word - PC relative relocation, add the relocated value to - // the value already in memory, after we adjust it for where the PC is. + /// reloc_pcrel_word - PC relative relocation, add the relocated value to + /// the value already in memory, after we adjust it for where the PC is. reloc_pcrel_word = 0, - // reloc_picrel_word - PIC base relative relocation, add the relocated - // value to the value already in memory, after we adjust it for where the - // PIC base is. + /// reloc_picrel_word - PIC base relative relocation, add the relocated + /// value to the value already in memory, after we adjust it for where the + /// PIC base is. reloc_picrel_word = 1, - - // reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just - // add the relocated value to the value already in memory. + + /// reloc_absolute_word - absolute relocation, just add the relocated + /// value to the value already in memory. reloc_absolute_word = 2, - reloc_absolute_dword = 3 + + /// reloc_absolute_word_sext - absolute relocation, just add the relocated + /// value to the value already in memory. In object files, it represents a + /// value which must be sign-extended when resolving the relocation. + reloc_absolute_word_sext = 3, + + /// reloc_absolute_dword - absolute relocation, just add the relocated + /// value to the value already in memory. + reloc_absolute_dword = 4 }; } } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 8506fa6..fb76aeb 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -13,80 +13,111 @@ #define DEBUG_TYPE "subtarget" #include "X86Subtarget.h" +#include "X86InstrInfo.h" #include "X86GenSubtarget.inc" -#include "llvm/Module.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/GlobalValue.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; #if defined(_MSC_VER) - #include <intrin.h> +#include <intrin.h> #endif -static cl::opt<X86Subtarget::AsmWriterFlavorTy> -AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::Unset), - cl::desc("Choose style of code to emit from X86 backend:"), - cl::values( - clEnumValN(X86Subtarget::ATT, "att", "Emit AT&T-style assembly"), - clEnumValN(X86Subtarget::Intel, "intel", "Emit Intel-style assembly"), - clEnumValEnd)); - - -/// True if accessing the GV requires an extra load. For Windows, dllimported -/// symbols are indirect, loading the value at address GV rather then the -/// value of GV itself. This means that the GlobalAddress must be in the base -/// or index register of the address, not the GV offset field. -bool X86Subtarget::GVRequiresExtraLoad(const GlobalValue* GV, - const TargetMachine& TM, - bool isDirectCall) const -{ - // FIXME: PIC - if (TM.getRelocationModel() != Reloc::Static && - TM.getCodeModel() != CodeModel::Large) { +/// ClassifyGlobalReference - Classify a global variable reference for the +/// current subtarget according to how we should reference it in a non-pcrel +/// context. +unsigned char X86Subtarget:: +ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { + // DLLImport only exists on windows, it is implemented as a load from a + // DLLIMPORT stub. + if (GV->hasDLLImportLinkage()) + return X86II::MO_DLLIMPORT; + + // GV with ghost linkage (in JIT lazy compilation mode) do not require an + // extra load from stub. + bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); + + // X86-64 in PIC mode. + if (isPICStyleRIPRel()) { + // Large model never uses stubs. + if (TM.getCodeModel() == CodeModel::Large) + return X86II::MO_NO_FLAG; + if (isTargetDarwin()) { - if (isDirectCall) - return false; - bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); - if (GV->hasHiddenVisibility() && - (Is64Bit || (!isDecl && !GV->hasCommonLinkage()))) - // If symbol visibility is hidden, the extra load is not needed if - // target is x86-64 or the symbol is definitely defined in the current - // translation unit. - return false; - return !isDirectCall && (isDecl || GV->isWeakForLinker()); - } else if (isTargetELF()) { + // If symbol visibility is hidden, the extra load is not needed if + // target is x86-64 or the symbol is definitely defined in the current + // translation unit. + if (GV->hasDefaultVisibility() && + (isDecl || GV->isWeakForLinker())) + return X86II::MO_GOTPCREL; + } else { + assert(isTargetELF() && "Unknown rip-relative target"); + // Extra load is needed for all externally visible. - if (isDirectCall) - return false; - if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) - return false; - return true; - } else if (isTargetCygMing() || isTargetWindows()) { - return (GV->hasDLLImportLinkage()); + if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility()) + return X86II::MO_GOTPCREL; } + + return X86II::MO_NO_FLAG; } - return false; -} + + if (isPICStyleGOT()) { // 32-bit ELF targets. + // Extra load is needed for all externally visible. + if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) + return X86II::MO_GOTOFF; + return X86II::MO_GOT; + } + + if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode. + // Determine whether we have a stub reference and/or whether the reference + // is relative to the PIC base or not. + + // If this is a strong reference to a definition, it is definitely not + // through a stub. + if (!isDecl && !GV->isWeakForLinker()) + return X86II::MO_PIC_BASE_OFFSET; -/// True if accessing the GV requires a register. This is a superset of the -/// cases where GVRequiresExtraLoad is true. Some variations of PIC require -/// a register, but not an extra load. -bool X86Subtarget::GVRequiresRegister(const GlobalValue *GV, - const TargetMachine& TM, - bool isDirectCall) const -{ - if (GVRequiresExtraLoad(GV, TM, isDirectCall)) - return true; - // Code below here need only consider cases where GVRequiresExtraLoad - // returns false. - if (TM.getRelocationModel() == Reloc::PIC_) - return !isDirectCall && - (GV->hasLocalLinkage() || GV->hasExternalLinkage()); - return false; + // Unless we have a symbol with hidden visibility, we have to go through a + // normal $non_lazy_ptr stub because this symbol might be resolved late. + if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. + return X86II::MO_DARWIN_NONLAZY_PIC_BASE; + + // If symbol visibility is hidden, we have a stub for common symbol + // references and external declarations. + if (isDecl || GV->hasCommonLinkage()) { + // Hidden $non_lazy_ptr reference. + return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE; + } + + // Otherwise, no stub. + return X86II::MO_PIC_BASE_OFFSET; + } + + if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode. + // Determine whether we have a stub reference. + + // If this is a strong reference to a definition, it is definitely not + // through a stub. + if (!isDecl && !GV->isWeakForLinker()) + return X86II::MO_NO_FLAG; + + // Unless we have a symbol with hidden visibility, we have to go through a + // normal $non_lazy_ptr stub because this symbol might be resolved late. + if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. + return X86II::MO_DARWIN_NONLAZY; + + // Otherwise, no stub. + return X86II::MO_NO_FLAG; + } + + // Direct static reference to global. + return X86II::MO_NO_FLAG; } + /// getBZeroEntry - This function returns the name of a function which has an /// interface like the non-standard bzero function, if such a function exists on /// the current subtarget and it is considered prefereable over memset with zero @@ -120,9 +151,9 @@ unsigned X86Subtarget::getSpecialAddressLatency() const { /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the /// specified arguments. If we can't run cpuid on the host, return true. -bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, - unsigned *rECX, unsigned *rEDX) { -#if defined(__x86_64__) || defined(_M_AMD64) +static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { +#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) #if defined(__GNUC__) // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. asm ("movq\t%%rbx, %%rsi\n\t" @@ -192,18 +223,19 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { char c[12]; } text; - if (X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) + if (GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) return; - X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); + GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); - if ((EDX >> 23) & 0x1) X86SSELevel = MMX; - if ((EDX >> 25) & 0x1) X86SSELevel = SSE1; - if ((EDX >> 26) & 0x1) X86SSELevel = SSE2; - if (ECX & 0x1) X86SSELevel = SSE3; - if ((ECX >> 9) & 0x1) X86SSELevel = SSSE3; - if ((ECX >> 19) & 0x1) X86SSELevel = SSE41; - if ((ECX >> 20) & 0x1) X86SSELevel = SSE42; + if ((EDX >> 15) & 1) HasCMov = true; + if ((EDX >> 23) & 1) X86SSELevel = MMX; + if ((EDX >> 25) & 1) X86SSELevel = SSE1; + if ((EDX >> 26) & 1) X86SSELevel = SSE2; + if (ECX & 0x1) X86SSELevel = SSE3; + if ((ECX >> 9) & 1) X86SSELevel = SSSE3; + if ((ECX >> 19) & 1) X86SSELevel = SSE41; + if ((ECX >> 20) & 1) X86SSELevel = SSE42; bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; @@ -218,7 +250,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { DetectFamilyModel(EAX, Family, Model); IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13); - X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); HasX86_64 = (EDX >> 29) & 0x1; HasSSE4A = IsAMD && ((ECX >> 6) & 0x1); HasFMA4 = IsAMD && ((ECX >> 16) & 0x1); @@ -227,13 +259,13 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { static const char *GetCurrentX86CPU() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; - if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) + if (GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) return "generic"; unsigned Family = 0; unsigned Model = 0; DetectFamilyModel(EAX, Family, Model); - X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); bool Em64T = (EDX >> 29) & 0x1; bool HasSSE3 = (ECX & 0x1); @@ -242,7 +274,7 @@ static const char *GetCurrentX86CPU() { char c[12]; } text; - X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); + GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); if (memcmp(text.c, "GenuineIntel", 12) == 0) { switch (Family) { case 3: @@ -319,9 +351,7 @@ static const char *GetCurrentX86CPU() { } case 15: if (HasSSE3) { - switch (Model) { - default: return "k8-sse3"; - } + return "k8-sse3"; } else { switch (Model) { case 1: return "opteron"; @@ -330,9 +360,7 @@ static const char *GetCurrentX86CPU() { } } case 16: - switch (Model) { - default: return "amdfam10"; - } + return "amdfam10"; default: return "generic"; } @@ -341,11 +369,12 @@ static const char *GetCurrentX86CPU() { } } -X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) - : AsmFlavor(AsmWriterFlavor) - , PICStyle(PICStyles::None) +X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, + bool is64Bit) + : PICStyle(PICStyles::None) , X86SSELevel(NoMMXSSE) , X863DNowLevel(NoThreeDNow) + , HasCMov(false) , HasX86_64(false) , HasSSE4A(false) , HasAVX(false) @@ -384,15 +413,14 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) if (Is64Bit) HasX86_64 = true; - DOUT << "Subtarget features: SSELevel " << X86SSELevel - << ", 3DNowLevel " << X863DNowLevel - << ", 64bit " << HasX86_64 << "\n"; + DEBUG(errs() << "Subtarget features: SSELevel " << X86SSELevel + << ", 3DNowLevel " << X863DNowLevel + << ", 64bit " << HasX86_64 << "\n"); assert((!Is64Bit || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); // Set the boolean corresponding to the current target triple, or the default // if one cannot be determined, to true. - const std::string& TT = M.getTargetTriple(); if (TT.length() > 5) { size_t Pos; if ((Pos = TT.find("-darwin")) != std::string::npos) { @@ -415,38 +443,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) TargetType = isWindows; } else if (TT.find("windows") != std::string::npos) { TargetType = isWindows; - } - else if (TT.find("-cl") != std::string::npos) { + } else if (TT.find("-cl") != std::string::npos) { TargetType = isDarwin; DarwinVers = 9; } - } else if (TT.empty()) { -#if defined(__CYGWIN__) - TargetType = isCygwin; -#elif defined(__MINGW32__) || defined(__MINGW64__) - TargetType = isMingw; -#elif defined(__APPLE__) - TargetType = isDarwin; -#if __APPLE_CC__ > 5400 - DarwinVers = 9; // GCC 5400+ is Leopard. -#else - DarwinVers = 8; // Minimum supported darwin is Tiger. -#endif - -#elif defined(_WIN32) || defined(_WIN64) - TargetType = isWindows; -#elif defined(__linux__) - // Linux doesn't imply ELF, but we don't currently support anything else. - TargetType = isELF; - IsLinux = true; -#endif - } - - // If the asm syntax hasn't been overridden on the command line, use whatever - // the target wants. - if (AsmFlavor == X86Subtarget::Unset) { - AsmFlavor = (TargetType == isWindows) - ? X86Subtarget::Intel : X86Subtarget::ATT; } // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64 diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 0d1434f..a2e368d 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -18,23 +18,22 @@ #include <string> namespace llvm { -class Module; class GlobalValue; class TargetMachine; +/// PICStyles - The X86 backend supports a number of different styles of PIC. +/// namespace PICStyles { enum Style { - Stub, GOT, RIPRel, WinPIC, None + StubPIC, // Used on i386-darwin in -fPIC mode. + StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode. + GOT, // Used on many 32-bit unices in -fPIC mode. + RIPRel, // Used on X86-64 when not in -static mode. + None // Set when in -static mode (not PIC or DynamicNoPIC mode). }; } class X86Subtarget : public TargetSubtarget { -public: - enum AsmWriterFlavorTy { - // Note: This numbering has to match the GCC assembler dialects for inline - // asm alternatives to work right. - ATT = 0, Intel = 1, Unset - }; protected: enum X86SSEEnum { NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42 @@ -44,10 +43,6 @@ protected: NoThreeDNow, ThreeDNow, ThreeDNowA }; - /// AsmFlavor - Which x86 asm dialect to use. - /// - AsmWriterFlavorTy AsmFlavor; - /// PICStyle - Which PIC style to use /// PICStyles::Style PICStyle; @@ -60,6 +55,10 @@ protected: /// X863DNowEnum X863DNowLevel; + /// HasCMov - True if this processor has conditional move instructions + /// (generally pentium pro+). + bool HasCMov; + /// HasX86_64 - True if the processor supports X86-64 instructions. /// bool HasX86_64; @@ -95,7 +94,7 @@ protected: unsigned MaxInlineSizeThreshold; private: - /// Is64Bit - True if the processor supports 64-bit instructions and module + /// Is64Bit - True if the processor supports 64-bit instructions and /// pointer size is 64 bit. bool Is64Bit; @@ -105,9 +104,9 @@ public: } TargetType; /// This constructor initializes the data members to match that - /// of the specified module. + /// of the specified triple. /// - X86Subtarget(const Module &M, const std::string &FS, bool is64Bit); + X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit); /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every @@ -145,66 +144,67 @@ public: bool hasAVX() const { return HasAVX; } bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } - bool isBTMemSlow() const { return IsBTMemSlow; } - unsigned getAsmFlavor() const { - return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0; - } - - bool isFlavorAtt() const { return AsmFlavor == ATT; } - bool isFlavorIntel() const { return AsmFlavor == Intel; } - bool isTargetDarwin() const { return TargetType == isDarwin; } - bool isTargetELF() const { - return TargetType == isELF; - } + bool isTargetELF() const { return TargetType == isELF; } + bool isTargetWindows() const { return TargetType == isWindows; } bool isTargetMingw() const { return TargetType == isMingw; } - bool isTargetCygMing() const { return (TargetType == isMingw || - TargetType == isCygwin); } bool isTargetCygwin() const { return TargetType == isCygwin; } + bool isTargetCygMing() const { + return TargetType == isMingw || TargetType == isCygwin; + } + + /// isTargetCOFF - Return true if this is any COFF/Windows target variant. + bool isTargetCOFF() const { + return TargetType == isMingw || TargetType == isCygwin || + TargetType == isWindows; + } + bool isTargetWin64() const { - return (Is64Bit && (TargetType == isMingw || TargetType == isWindows)); + return Is64Bit && (TargetType == isMingw || TargetType == isWindows); } std::string getDataLayout() const { const char *p; if (is64Bit()) p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128"; - else { - if (isTargetDarwin()) - p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128"; - else - p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32"; - } + else if (isTargetDarwin()) + p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128"; + else + p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32"; return std::string(p); } bool isPICStyleSet() const { return PICStyle != PICStyles::None; } bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; } - bool isPICStyleStub() const { return PICStyle == PICStyles::Stub; } bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; } - bool isPICStyleWinPIC() const { return PICStyle == PICStyles::WinPIC; } + + bool isPICStyleStubPIC() const { + return PICStyle == PICStyles::StubPIC; + } + + bool isPICStyleStubNoDynamic() const { + return PICStyle == PICStyles::StubDynamicNoPIC; + } + bool isPICStyleStubAny() const { + return PICStyle == PICStyles::StubDynamicNoPIC || + PICStyle == PICStyles::StubPIC; } - /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard. + /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, + /// 10 = Snow Leopard, etc. unsigned getDarwinVers() const { return DarwinVers; } /// isLinux - Return true if the target is "Linux". bool isLinux() const { return IsLinux; } - /// True if accessing the GV requires an extra load. For Windows, dllimported - /// symbols are indirect, loading the value at address GV rather then the - /// value of GV itself. This means that the GlobalAddress must be in the base - /// or index register of the address, not the GV offset field. - bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM, - bool isDirectCall) const; - - /// True if accessing the GV requires a register. This is a superset of the - /// cases where GVRequiresExtraLoad is true. Some variations of PIC require - /// a register, but not an extra load. - bool GVRequiresRegister(const GlobalValue* GV, const TargetMachine& TM, - bool isDirectCall) const; + + /// ClassifyGlobalReference - Classify a global variable reference for the + /// current subtarget according to how we should reference it in a non-pcrel + /// context. + unsigned char ClassifyGlobalReference(const GlobalValue *GV, + const TargetMachine &TM)const; /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls /// to immediate address. @@ -224,13 +224,6 @@ public: unsigned getSpecialAddressLatency() const; }; -namespace X86 { - /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in - /// the specified arguments. If we can't run cpuid on the host, return true. - bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, - unsigned *rECX, unsigned *rEDX); -} - } // End llvm namespace #endif diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index b000914..a61de1c 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -11,172 +11,134 @@ // //===----------------------------------------------------------------------===// -#include "X86TargetAsmInfo.h" +#include "X86MCAsmInfo.h" #include "X86TargetMachine.h" #include "X86.h" -#include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Target/TargetRegistry.h" using namespace llvm; -/// X86TargetMachineModule - Note that this is used on hosts that cannot link -/// in a library unless there are references into the library. In particular, -/// it seems that it is not possible to get things to work on Win32 without -/// this. Though it is unused, do not remove it. -extern "C" int X86TargetMachineModule; -int X86TargetMachineModule = 0; - -// Register the target. -static RegisterTarget<X86_32TargetMachine> -X("x86", "32-bit X86: Pentium-Pro and above"); -static RegisterTarget<X86_64TargetMachine> -Y("x86-64", "64-bit X86: EM64T and AMD64"); - -// Force static initialization. -extern "C" void LLVMInitializeX86Target() { } - -// No assembler printer by default -X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0; - -const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const { - if (Subtarget.isFlavorIntel()) - return new X86WinTargetAsmInfo(*this); - else - switch (Subtarget.TargetType) { - case X86Subtarget::isDarwin: - return new X86DarwinTargetAsmInfo(*this); - case X86Subtarget::isELF: - return new X86ELFTargetAsmInfo(*this); - case X86Subtarget::isMingw: - case X86Subtarget::isCygwin: - return new X86COFFTargetAsmInfo(*this); - case X86Subtarget::isWindows: - return new X86WinTargetAsmInfo(*this); - default: - return new X86GenericTargetAsmInfo(*this); - } -} - -unsigned X86_32TargetMachine::getJITMatchQuality() { -#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) - return 10; -#endif - return 0; -} - -unsigned X86_64TargetMachine::getJITMatchQuality() { -#if defined(__x86_64__) || defined(_M_AMD64) - return 10; -#endif - return 0; +static const MCAsmInfo *createMCAsmInfo(const Target &T, + const StringRef &TT) { + Triple TheTriple(TT); + switch (TheTriple.getOS()) { + case Triple::Darwin: + return new X86MCAsmInfoDarwin(TheTriple); + case Triple::MinGW32: + case Triple::MinGW64: + case Triple::Cygwin: + return new X86MCAsmInfoCOFF(TheTriple); + case Triple::Win32: + return new X86WinMCAsmInfo(TheTriple); + default: + return new X86ELFMCAsmInfo(TheTriple); + } } -unsigned X86_32TargetMachine::getModuleMatchQuality(const Module &M) { - // We strongly match "i[3-9]86-*". - std::string TT = M.getTargetTriple(); - if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' && - TT[4] == '-' && TT[1] - '3' < 6) - return 20; - // If the target triple is something non-X86, we don't match. - if (!TT.empty()) return 0; +extern "C" void LLVMInitializeX86Target() { + // Register the target. + RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target); + RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target); - if (M.getEndianness() == Module::LittleEndian && - M.getPointerSize() == Module::Pointer32) - return 10; // Weak match - else if (M.getEndianness() != Module::AnyEndianness || - M.getPointerSize() != Module::AnyPointerSize) - return 0; // Match for some other target + // Register the target asm info. + RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo); + RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo); - return getJITMatchQuality()/2; + // Register the code emitter. + TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter); + TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86MCCodeEmitter); } -unsigned X86_64TargetMachine::getModuleMatchQuality(const Module &M) { - // We strongly match "x86_64-*". - std::string TT = M.getTargetTriple(); - if (TT.size() >= 7 && TT[0] == 'x' && TT[1] == '8' && TT[2] == '6' && - TT[3] == '_' && TT[4] == '6' && TT[5] == '4' && TT[6] == '-') - return 20; - - // We strongly match "amd64-*". - if (TT.size() >= 6 && TT[0] == 'a' && TT[1] == 'm' && TT[2] == 'd' && - TT[3] == '6' && TT[4] == '4' && TT[5] == '-') - return 20; - - // If the target triple is something non-X86-64, we don't match. - if (!TT.empty()) return 0; - - if (M.getEndianness() == Module::LittleEndian && - M.getPointerSize() == Module::Pointer64) - return 10; // Weak match - else if (M.getEndianness() != Module::AnyEndianness || - M.getPointerSize() != Module::AnyPointerSize) - return 0; // Match for some other target - return getJITMatchQuality()/2; -} - -X86_32TargetMachine::X86_32TargetMachine(const Module &M, const std::string &FS) - : X86TargetMachine(M, FS, false) { +X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : X86TargetMachine(T, TT, FS, false) { } -X86_64TargetMachine::X86_64TargetMachine(const Module &M, const std::string &FS) - : X86TargetMachine(M, FS, true) { +X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : X86TargetMachine(T, TT, FS, true) { } -/// X86TargetMachine ctor - Create an ILP32 architecture model +/// X86TargetMachine ctor - Create an X86 target. /// -X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, - bool is64Bit) - : Subtarget(M, FS, is64Bit), +X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, + const std::string &FS, bool is64Bit) + : LLVMTargetMachine(T, TT), + Subtarget(TT, FS, is64Bit), DataLayout(Subtarget.getDataLayout()), FrameInfo(TargetFrameInfo::StackGrowsDown, - Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4), + Subtarget.getStackAlignment(), + (Subtarget.isTargetWin64() ? -40 : + (Subtarget.is64Bit() ? -8 : -4))), InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) { DefRelocModel = getRelocationModel(); - // FIXME: Correctly select PIC model for Win64 stuff + + // If no relocation model was picked, default as appropriate for the target. if (getRelocationModel() == Reloc::Default) { - if (Subtarget.isTargetDarwin() || - (Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64())) - setRelocationModel(Reloc::DynamicNoPIC); - else + if (!Subtarget.isTargetDarwin()) setRelocationModel(Reloc::Static); + else if (Subtarget.is64Bit()) + setRelocationModel(Reloc::PIC_); + else + setRelocationModel(Reloc::DynamicNoPIC); } - // ELF doesn't have a distinct dynamic-no-PIC model. Dynamic-no-PIC - // is defined as a model for code which may be used in static or - // dynamic executables but not necessarily a shared library. On ELF - // implement this by using the Static model. - if (Subtarget.isTargetELF() && - getRelocationModel() == Reloc::DynamicNoPIC) - setRelocationModel(Reloc::Static); - - if (Subtarget.is64Bit()) { - // No DynamicNoPIC support under X86-64. - if (getRelocationModel() == Reloc::DynamicNoPIC) + assert(getRelocationModel() != Reloc::Default && + "Relocation mode not picked"); + + // If no code model is picked, default to small. + if (getCodeModel() == CodeModel::Default) + setCodeModel(CodeModel::Small); + + // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC + // is defined as a model for code which may be used in static or dynamic + // executables but not necessarily a shared library. On X86-32 we just + // compile in -static mode, in x86-64 we use PIC. + if (getRelocationModel() == Reloc::DynamicNoPIC) { + if (is64Bit) setRelocationModel(Reloc::PIC_); - // Default X86-64 code model is small. - if (getCodeModel() == CodeModel::Default) - setCodeModel(CodeModel::Small); + else if (!Subtarget.isTargetDarwin()) + setRelocationModel(Reloc::Static); } - if (Subtarget.isTargetCygMing()) - Subtarget.setPICStyle(PICStyles::WinPIC); - else if (Subtarget.isTargetDarwin()) { + // If we are on Darwin, disallow static relocation model in X86-64 mode, since + // the Mach-O file format doesn't support it. + if (getRelocationModel() == Reloc::Static && + Subtarget.isTargetDarwin() && + is64Bit) + setRelocationModel(Reloc::PIC_); + + // Determine the PICStyle based on the target selected. + if (getRelocationModel() == Reloc::Static) { + // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None. + Subtarget.setPICStyle(PICStyles::None); + } else if (Subtarget.isTargetCygMing()) { + Subtarget.setPICStyle(PICStyles::None); + } else if (Subtarget.isTargetDarwin()) { if (Subtarget.is64Bit()) Subtarget.setPICStyle(PICStyles::RIPRel); - else - Subtarget.setPICStyle(PICStyles::Stub); + else if (getRelocationModel() == Reloc::PIC_) + Subtarget.setPICStyle(PICStyles::StubPIC); + else { + assert(getRelocationModel() == Reloc::DynamicNoPIC); + Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC); + } } else if (Subtarget.isTargetELF()) { if (Subtarget.is64Bit()) Subtarget.setPICStyle(PICStyles::RIPRel); else Subtarget.setPICStyle(PICStyles::GOT); } + + // Finally, if we have "none" as our PIC style, force to static mode. + if (Subtarget.getPICStyle() == PICStyles::None) + setRelocationModel(Reloc::Static); } //===----------------------------------------------------------------------===// @@ -212,33 +174,16 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, return true; // -print-machineinstr should print after this. } -bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, - raw_ostream &Out) { - // FIXME: Move this somewhere else! - // On Darwin, override 64-bit static relocation to pic_ since the - // assembler doesn't support it. - if (DefRelocModel == Reloc::Static && - Subtarget.isTargetDarwin() && Subtarget.is64Bit() && - getCodeModel() == CodeModel::Small) - setRelocationModel(Reloc::PIC_); - - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, Verbose)); - return false; -} - bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE) { // FIXME: Move this to TargetJITInfo! // On Darwin, do not override 64-bit setting made in X86TargetMachine(). if (DefRelocModel == Reloc::Default && - (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) + (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) { setRelocationModel(Reloc::Static); + Subtarget.setPICStyle(PICStyles::None); + } // 64-bit JIT places everything in the same buffer except external functions. // On Darwin, use small code model but hack the call instruction for @@ -251,24 +196,20 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, } PM.add(createX86CodeEmitterPass(*this, MCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } return false; } bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE) { // FIXME: Move this to TargetJITInfo! // On Darwin, do not override 64-bit setting made in X86TargetMachine(). if (DefRelocModel == Reloc::Default && - (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) + (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) { setRelocationModel(Reloc::Static); + Subtarget.setPICStyle(PICStyles::None); + } // 64-bit JIT places everything in the same buffer except external functions. // On Darwin, use small code model but hack the call instruction for @@ -281,40 +222,34 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, } PM.add(createX86JITCodeEmitterPass(*this, JCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } return false; } +bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE) { + PM.add(createX86ObjectCodeEmitterPass(*this, OCE)); + return false; +} + bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE) { PM.add(createX86CodeEmitterPass(*this, MCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } - return false; } bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE) { PM.add(createX86JITCodeEmitterPass(*this, JCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } - return false; } +bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE) { + PM.add(createX86ObjectCodeEmitterPass(*this, OCE)); + return false; +} diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 90a5cc2..b538408 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -26,7 +26,7 @@ namespace llvm { -class raw_ostream; +class formatted_raw_ostream; class X86TargetMachine : public LLVMTargetMachine { X86Subtarget Subtarget; @@ -38,18 +38,9 @@ class X86TargetMachine : public LLVMTargetMachine { X86ELFWriterInfo ELFWriterInfo; Reloc::Model DefRelocModel; // Reloc model before it's overridden. -protected: - virtual const TargetAsmInfo *createTargetAsmInfo() const; - - // To avoid having target depend on the asmprinter stuff libraries, asmprinter - // set this functions to ctor pointer at startup time if they are linked in. - typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, - X86TargetMachine &tm, - bool verbose); - static AsmPrinterCtorFn AsmPrinterCtor; - public: - X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit); + X86TargetMachine(const Target &T, const std::string &TT, + const std::string &FS, bool is64Bit); virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } @@ -66,50 +57,41 @@ public: return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; } - static unsigned getModuleMatchQuality(const Module &M); - static unsigned getJITMatchQuality(); - - static void registerAsmPrinter(AsmPrinterCtorFn F) { - AsmPrinterCtor = F; - } - // Set up the pass pipeline. virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE); + MachineCodeEmitter &MCE); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE); + JITCodeEmitter &JCE); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + MachineCodeEmitter &MCE); virtual bool addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE); + JITCodeEmitter &JCE); virtual bool addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE); + ObjectCodeEmitter &OCE); }; /// X86_32TargetMachine - X86 32-bit target machine. /// class X86_32TargetMachine : public X86TargetMachine { public: - X86_32TargetMachine(const Module &M, const std::string &FS); - - static unsigned getJITMatchQuality(); - static unsigned getModuleMatchQuality(const Module &M); + X86_32TargetMachine(const Target &T, const std::string &M, + const std::string &FS); }; /// X86_64TargetMachine - X86 64-bit target machine. /// class X86_64TargetMachine : public X86TargetMachine { public: - X86_64TargetMachine(const Module &M, const std::string &FS); - - static unsigned getJITMatchQuality(); - static unsigned getModuleMatchQuality(const Module &M); + X86_64TargetMachine(const Target &T, const std::string &TT, + const std::string &FS); }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp new file mode 100644 index 0000000..d39b3c4 --- /dev/null +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -0,0 +1,65 @@ +//===-- llvm/Target/X86/X86TargetObjectFile.cpp - X86 Object Info ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86TargetObjectFile.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Mangler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +using namespace llvm; + +const MCExpr *X8632_MachoTargetObjectFile:: +getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + bool &IsIndirect, bool &IsPCRel) const { + // The mach-o version of this method defaults to returning a stub reference. + IsIndirect = true; + IsPCRel = false; + + + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + + // Add information about the stub reference to MachOMMI so that the stub gets + // emitted by the asmprinter. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); + const MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = getContext().GetOrCreateSymbol(Name.str()); + } + + return MCSymbolRefExpr::Create(Sym, getContext()); +} + +const MCExpr *X8664_MachoTargetObjectFile:: +getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + bool &IsIndirect, bool &IsPCRel) const { + + // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which + // is an indirect pc-relative reference. + IsIndirect = true; + IsPCRel = true; + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, false); + Name += "@GOTPCREL"; + const MCExpr *Res = + MCSymbolRefExpr::Create(Name.str(), getContext()); + const MCExpr *Four = MCConstantExpr::Create(4, getContext()); + return MCBinaryExpr::CreateAdd(Res, Four, getContext()); +} + diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h new file mode 100644 index 0000000..377a93b --- /dev/null +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -0,0 +1,40 @@ +//===-- llvm/Target/X86/X86TargetObjectFile.h - X86 Object Info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H +#define LLVM_TARGET_X86_TARGETOBJECTFILE_H + +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { + + /// X8632_MachoTargetObjectFile - This TLOF implementation is used for + /// Darwin/x86-32. + class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { + public: + + virtual const MCExpr * + getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + bool &IsIndirect, bool &IsPCRel) const; + }; + + /// X8664_MachoTargetObjectFile - This TLOF implementation is used for + /// Darwin/x86-64. + class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { + public: + + virtual const MCExpr * + getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + bool &IsIndirect, bool &IsPCRel) const; + }; +} // end namespace llvm + +#endif |